framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,16,1,0,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,128,1,0,0.3767
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,64,1,0,0.3019
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1024,1,0,1.4708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,256,1,0,0.5165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,512,1,0,0.8244
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,32,1,0,0.2779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1536,1,0,2.1247
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,0,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,2048,1,0,2.8285
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,3072,1,0,4.2433
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,4096,1,0,5.7360
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,6144,1,0,8.5273
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,8192,1,0,11.5200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,10240,1,0,14.8775
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,12288,1,0,17.7671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,0,0.2297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,16,1,0,0.2742
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,32,1,0,0.3000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,64,1,0,0.3756
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,128,1,0,0.5370
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,16384,1,0,24.4188
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,256,1,0,0.8209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,512,1,0,1.4940
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1024,1,0,2.9223
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1536,1,0,4.1498
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,32768,1,0,47.2712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,2048,1,0,5.4912
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,3072,1,0,8.3912
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,4096,1,0,11.3051
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,6144,1,0,17.4017
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,8192,1,0,23.6131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,0,0.2584
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,16,1,0,0.3268
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,10240,1,0,29.9117
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,32,1,0,0.4008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,12288,1,0,36.7035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,64,1,0,0.5404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,128,1,0,0.8466
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,256,1,0,1.4872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,16384,1,0,43.9456
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,512,1,0,2.9626
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1024,1,0,5.4439
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1536,1,0,8.0588
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,2048,1,0,10.9348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,3072,1,0,16.7596
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,4096,1,0,22.6227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,32768,1,0,95.7462
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,6144,1,0,36.5492
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,0,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,8192,1,0,40.7161
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,16,1,0,0.3990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,32,1,0,0.5438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,64,1,0,0.8469
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,10240,1,0,53.2515
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,128,1,0,1.6055
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,256,1,0,2.7936
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,12288,1,0,65.5014
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,512,1,0,5.4523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1024,1,0,10.6013
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1536,1,0,16.3993
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,16384,1,0,90.1882
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,2048,1,0,21.5270
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,3072,1,0,34.0815
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,4096,1,0,40.1788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,0,0.2793
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,16,1,0,0.5440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,6144,1,0,63.6166
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,32,1,0,0.8474
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,64,1,0,1.4814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,128,1,0,2.7926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,8192,1,0,85.5587
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,256,1,0,5.4000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,32768,1,0,198.6670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,512,1,0,10.5592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,10240,1,0,110.0707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1024,1,0,21.5760
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1536,1,0,32.3585
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,12288,1,0,134.4017
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,2048,1,0,37.2937
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,0,0.2973
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,16,1,0,0.8489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,32,1,0,1.4871
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,3072,1,0,60.6777
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,64,1,0,2.7938
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,16384,1,0,180.4201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,128,1,0,5.8680
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,4096,1,0,81.1189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,256,1,0,10.5202
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,512,1,0,20.8935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1024,1,0,36.9277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,6144,1,0,123.9797
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,16,1,0,1.4856
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1536,1,0,56.9437
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,32,1,0,2.7857
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,64,1,0,5.3968
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,2048,1,0,76.2790
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,128,1,0,10.5565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,8192,1,0,175.0365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,0,0.3268
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,256,1,0,20.8980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,3072,1,0,118.1161
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,512,1,0,35.4066
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,16,1,0,2.7864
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,32,1,0,5.3738
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,64,1,0,10.5384
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,4096,1,0,166.8220
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,128,1,0,20.8934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1024,1,0,78.1048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,0,0.4010
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,16,1,0,5.3974
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,256,1,0,34.8631
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1536,1,0,114.5619
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,32,1,0,10.5258
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,64,1,0,22.9742
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,512,1,0,72.6451
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,128,1,0,34.5208
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,2048,1,0,155.4469
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,0,0.5395
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,0,0.2257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,16,1,0,0.2512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,32,1,0,0.2641
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,64,1,0,0.2872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,128,1,0,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,256,1,0,0.4808
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,512,1,0,0.6852
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1024,1,0,1.2064
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1536,1,0,1.7108
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,256,1,0,72.2609
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,2048,1,0,2.2401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,3072,1,0,3.2665
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,4096,1,0,4.3408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,6144,1,0,6.6532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1024,1,0,149.4255
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,8192,1,0,9.1492
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,10240,1,0,11.4714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,12288,1,0,13.9428
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,0,0.2279
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,16384,1,0,19.9747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,16,1,0,0.2642
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,32,1,0,0.2855
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,64,1,0,0.3419
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,128,1,0,0.4566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,256,1,0,0.6805
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,512,1,0,1.1982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1024,1,0,2.2141
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1536,1,0,3.1999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,32768,1,0,42.4948
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,2048,1,0,4.2351
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,3072,1,0,6.3805
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,4096,1,0,8.6603
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,6144,1,0,13.1020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,512,1,0,146.6411
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,8192,1,0,18.1253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,0,0.2374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,10240,1,0,23.6049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,12288,1,0,28.1000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,16,1,0,0.2882
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,32,1,0,0.3461
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,64,1,0,0.4567
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,128,1,0,0.6791
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,256,1,0,1.1941
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,16384,1,0,40.0405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,512,1,0,2.1949
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1024,1,0,4.1726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1536,1,0,6.4978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,2048,1,0,8.2724
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,3072,1,0,12.8238
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,4096,1,0,17.5392
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,32768,1,0,79.7966
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,6144,1,0,27.3925
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,0,0.2463
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,8192,1,0,37.5175
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,16,1,0,0.3435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,32,1,0,0.4550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,10240,1,0,47.9589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,64,1,0,0.6814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,128,1,0,1.1892
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,256,1,0,2.1926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,12288,1,0,59.5777
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,512,1,0,4.1430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1024,1,0,8.1654
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,16384,1,0,74.4993
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1536,1,0,12.4652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,2048,1,0,16.5412
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,3072,1,0,25.8333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,4096,1,0,36.3697
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,0,0.2479
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,16,1,0,0.4596
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,32,1,0,0.6807
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,6144,1,0,56.3909
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,8192,1,0,70.2394
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,64,1,0,1.3165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,128,1,0,2.1841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,256,1,0,4.1345
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,32768,1,0,162.4195
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,10240,1,0,88.7204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,512,1,0,8.1514
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1024,1,0,16.2819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,12288,1,0,109.7651
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1536,1,0,24.6926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,2048,1,0,33.7850
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,0,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,16,1,0,0.6850
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,3072,1,0,53.4753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,32,1,0,1.1872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,64,1,0,2.1868
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,128,1,0,4.1178
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,16384,1,0,150.7921
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,4096,1,0,66.5946
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,256,1,0,8.1559
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,512,1,0,16.0675
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1024,1,0,33.4134
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,0,0.2809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,6144,1,0,104.2095
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,16,1,0,1.1954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1536,1,0,49.6982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,32,1,0,2.1835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,64,1,0,4.1301
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,2048,1,0,62.1250
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,128,1,0,8.1459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,8192,1,0,142.4167
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,256,1,0,16.0277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,0,0.3409
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,512,1,0,32.2103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,3072,1,0,99.4000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,16,1,0,2.1785
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,32,1,0,4.1132
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,64,1,0,8.1114
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1024,1,0,63.4751
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,4096,1,0,134.7401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,128,1,0,15.9658
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,256,1,0,31.8183
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,0,0.4564
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,16,1,0,4.1092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1536,1,0,92.8482
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,32,1,0,8.1147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,0,0.2011
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,64,1,0,15.9597
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,512,1,0,59.4929
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,2048,1,0,128.0921
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,16,1,0,0.2374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,32,1,0,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,128,1,0,35.9772
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,64,1,0,0.2568
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,128,1,0,0.3087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,512,1,0,0.5741
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,256,1,0,0.4018
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1024,1,0,0.9608
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1536,1,0,1.3692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,256,1,0,59.0672
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,2048,1,0,1.7618
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,3072,1,0,2.5870
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,4096,1,0,3.4112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,6144,1,0,5.1982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1024,1,0,122.5276
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,8192,1,0,6.9472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,10240,1,0,8.8770
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,12288,1,0,10.7806
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,0,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,16,1,0,0.2414
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,32,1,0,0.2564
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,16384,1,0,15.4296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,64,1,0,0.3102
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,512,1,0,119.1998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,128,1,0,0.4010
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,256,1,0,0.6282
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,512,1,0,0.9481
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,32768,1,0,34.6472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1024,1,0,1.7338
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1536,1,0,2.5031
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,2048,1,0,3.2841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,3072,1,0,4.9454
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,4096,1,0,6.7599
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,6144,1,0,10.1581
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,8192,1,0,14.1842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,10240,1,0,18.8443
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,0,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,16,1,0,0.2735
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,12288,1,0,22.7779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,32,1,0,0.3089
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,64,1,0,0.4006
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,16384,1,0,31.8452
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,128,1,0,0.5698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,256,1,0,0.9481
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,512,1,0,1.7279
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1024,1,0,3.2251
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1536,1,0,4.8217
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,2048,1,0,6.4360
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,3072,1,0,10.1085
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,4096,1,0,13.8203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,32768,1,0,71.1745
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,6144,1,0,20.4939
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,0,0.2275
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,8192,1,0,29.8691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,16,1,0,0.3111
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,32,1,0,0.4004
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,10240,1,0,38.8020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,64,1,0,0.5707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,128,1,0,0.9452
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,256,1,0,1.7176
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,12288,1,0,46.5510
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,512,1,0,3.2218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1024,1,0,6.2752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1536,1,0,9.4924
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,16384,1,0,64.3874
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,2048,1,0,13.4561
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,3072,1,0,19.9420
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,4096,1,0,27.6896
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,0,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,16,1,0,0.4030
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,6144,1,0,43.6016
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,32,1,0,0.5701
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,64,1,0,0.9450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,128,1,0,1.7130
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,8192,1,0,60.8065
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,32768,1,0,137.0663
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,256,1,0,3.2156
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,512,1,0,6.2411
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,10240,1,0,78.9610
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1024,1,0,12.3755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,12288,1,0,96.9771
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1536,1,0,19.8604
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,2048,1,0,26.5803
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,0,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,16,1,0,0.5703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,3072,1,0,41.1886
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,32,1,0,0.9462
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,16384,1,0,124.5887
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,64,1,0,1.7124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,128,1,0,3.1896
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,256,1,0,6.2254
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,4096,1,0,58.1612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,512,1,0,12.3947
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1024,1,0,27.3498
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,0,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,6144,1,0,90.1100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1536,1,0,39.0062
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,16,1,0,0.9465
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,32,1,0,1.7140
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,2048,1,0,53.0850
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,64,1,0,3.1911
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,8192,1,0,115.9311
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,128,1,0,7.2331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,256,1,0,12.2854
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,0,0.3082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,512,1,0,24.7206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,3072,1,0,84.9768
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,16,1,0,1.7133
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,32,1,0,3.7100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,4096,1,0,107.9808
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,64,1,0,6.2397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1024,1,0,51.1990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,128,1,0,12.2769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,256,1,0,24.3567
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,0,0.3997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,16,1,0,3.1819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,32,1,0,6.2082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1536,1,0,79.6457
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,64,1,0,12.2756
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,2048,1,0,99.9437
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,0,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,512,1,0,49.3848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,128,1,0,24.3585
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,16,1,0,0.2172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,32,1,0,0.2179
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,64,1,0,0.2498
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,128,1,0,0.3049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,256,1,0,0.3731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,512,1,0,0.5179
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1024,1,0,0.8407
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,256,1,0,49.2885
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1536,1,0,1.1755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,2048,1,0,1.5154
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,3072,1,0,2.2237
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1024,1,0,98.6586
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,4096,1,0,2.9262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,6144,1,0,4.5316
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,8192,1,0,6.2429
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,10240,1,0,7.7902
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,12288,1,0,9.5006
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,0,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,16384,1,0,13.3639
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,512,1,0,94.0644
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,16,1,0,0.2297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,32,1,0,0.2506
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,64,1,0,0.2920
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,128,1,0,0.3745
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,256,1,0,0.5170
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,512,1,0,0.8355
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,32768,1,0,30.2710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1024,1,0,1.4825
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1536,1,0,2.2958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,2048,1,0,2.8263
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,3072,1,0,4.2415
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,4096,1,0,5.9335
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,6144,1,0,8.9900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,8192,1,0,12.4134
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,10240,1,0,16.1922
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,0,0.1961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,16,1,0,0.2484
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,32,1,0,0.2919
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,12288,1,0,20.2942
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,64,1,0,0.3716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,128,1,0,0.5166
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,16384,1,0,27.5117
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,256,1,0,0.8328
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,512,1,0,1.4775
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1024,1,0,2.7918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1536,1,0,4.1156
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,2048,1,0,5.4710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,3072,1,0,8.7866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,4096,1,0,11.7548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,32768,1,0,63.3763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,6144,1,0,19.4258
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,8192,1,0,25.0935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,0,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,16,1,0,0.2927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,32,1,0,0.3748
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,10240,1,0,33.2218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,64,1,0,0.5171
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,12288,1,0,41.1011
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,128,1,0,0.8288
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,256,1,0,1.4684
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,512,1,0,2.7705
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1024,1,0,5.3692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,16384,1,0,57.3657
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1536,1,0,8.1758
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,2048,1,0,11.0595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,3072,1,0,18.1855
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,4096,1,0,24.1487
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,0,0.2194
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,16,1,0,0.3745
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,6144,1,0,39.5173
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,32,1,0,0.5132
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,8192,1,0,53.1915
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,64,1,0,0.8297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,128,1,0,1.4723
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,32768,1,0,127.6464
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,256,1,0,2.7341
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,10240,1,0,67.4732
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,512,1,0,5.3346
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1024,1,0,10.5552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,12288,1,0,82.8047
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1536,1,0,16.5051
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,2048,1,0,24.1265
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,0,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,16,1,0,0.5189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,3072,1,0,35.7890
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,32,1,0,0.8317
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,16384,1,0,115.1477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,64,1,0,1.4637
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,128,1,0,2.7459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,4096,1,0,49.2246
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,256,1,0,5.2977
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,512,1,0,12.2199
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,6144,1,0,77.5123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1024,1,0,21.0243
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,0,0.2461
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1536,1,0,33.2459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,16,1,0,0.8312
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,32,1,0,1.4677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,2048,1,0,46.8015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,64,1,0,3.2541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,8192,1,0,106.9553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,128,1,0,5.2916
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,256,1,0,10.4788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,3072,1,0,72.4067
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,0,0.2879
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,512,1,0,21.1684
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,16,1,0,1.4687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,32,1,0,2.7339
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,64,1,0,5.2894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,4096,1,0,100.3840
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1024,1,0,44.2233
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,128,1,0,10.4471
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,256,1,0,20.8256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,0,0.3685
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1536,1,0,67.0933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,16,1,0,2.7366
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,32,1,0,5.2835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,512,1,0,42.0786
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,64,1,0,10.4196
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,0,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,2048,1,0,91.7723
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,128,1,0,20.7052
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,16,1,0,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,32,1,0,0.2174
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,64,1,0,0.2265
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,128,1,0,0.2800
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,256,1,0,0.3573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,256,1,0,41.5948
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,512,1,0,0.4942
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1024,1,0,0.7885
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1536,1,0,1.0897
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1024,1,0,87.6071
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,2048,1,0,1.3952
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,3072,1,0,2.0451
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,4096,1,0,2.6949
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,6144,1,0,4.2315
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,8192,1,0,5.8188
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,10240,1,0,7.2037
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,12288,1,0,8.7374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,16384,1,0,12.3495
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,0,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,16,1,0,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,32,1,0,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,64,1,0,0.2804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,512,1,0,98.1029
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,128,1,0,0.3608
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,256,1,0,0.4899
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,512,1,0,0.7820
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,32768,1,0,29.0240
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1024,1,0,1.3740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1536,1,0,1.9821
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,2048,1,0,2.5871
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,3072,1,0,4.0000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,4096,1,0,5.2970
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,6144,1,0,8.3690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,8192,1,0,11.5262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,10240,1,0,14.9786
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,16,1,0,0.2404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,0,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,32,1,0,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,12288,1,0,18.4055
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,64,1,0,0.3601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,128,1,0,0.4880
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,16384,1,0,25.9390
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,256,1,0,0.7784
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,512,1,0,1.3644
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1024,1,0,2.5608
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1536,1,0,3.7686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,2048,1,0,5.1005
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,3072,1,0,7.8589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,32768,1,0,59.8685
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,4096,1,0,11.1281
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,6144,1,0,17.2948
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,8192,1,0,23.9811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,0,0.1928
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,16,1,0,0.2797
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,10240,1,0,31.4825
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,32,1,0,0.3622
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,12288,1,0,38.8450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,64,1,0,0.4910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,128,1,0,0.7756
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,256,1,0,1.3568
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,512,1,0,2.5425
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1024,1,0,5.0123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,16384,1,0,53.5965
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1536,1,0,7.3997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,2048,1,0,10.0938
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,3072,1,0,16.1128
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,4096,1,0,22.5536
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,0,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,6144,1,0,36.5204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,16,1,0,0.3607
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,32,1,0,0.4905
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,8192,1,0,49.8744
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,64,1,0,0.7765
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,32768,1,0,119.9337
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,128,1,0,1.3508
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,10240,1,0,62.9677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,256,1,0,2.5149
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,512,1,0,4.8812
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1024,1,0,11.0015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,12288,1,0,77.7795
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1536,1,0,15.0674
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,2048,1,0,20.7642
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,0,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,16,1,0,0.4918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,16384,1,0,107.7731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,32,1,0,0.7780
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,3072,1,0,33.3362
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,64,1,0,1.3446
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,128,1,0,2.5139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,4096,1,0,46.0051
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,256,1,0,4.8914
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,512,1,0,9.6009
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1024,1,0,19.7653
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,6144,1,0,71.4696
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,0,0.2376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1536,1,0,31.2025
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,16,1,0,0.7803
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,32,1,0,1.3476
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,2048,1,0,42.5109
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,64,1,0,2.5029
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,8192,1,0,99.0003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,128,1,0,4.8377
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,256,1,0,9.5827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,3072,1,0,66.4746
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,512,1,0,19.4415
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,0,0.2763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,16,1,0,1.3520
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,32,1,0,2.5003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,64,1,0,4.8399
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1024,1,0,40.1703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,4096,1,0,92.5918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,128,1,0,9.5092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,256,1,0,18.9937
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1536,1,0,61.6269
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,0,0.3583
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,16,1,0,2.4976
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,32,1,0,4.8379
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,512,1,0,38.2181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,64,1,0,9.5071
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,2048,1,0,88.4232
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,0,0.1673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,16,1,0,0.2013
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,128,1,0,18.8495
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,32,1,0,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,64,1,0,0.2340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,128,1,0,0.2721
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,256,1,0,38.0365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,256,1,0,0.3536
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1024,1,0,79.9099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,512,1,0,0.4787
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1024,1,0,0.7620
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1536,1,0,1.1095
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,2048,1,0,1.3370
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,3072,1,0,1.9591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,4096,1,0,2.5776
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,6144,1,0,4.0744
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,8192,1,0,5.4347
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,10240,1,0,6.8990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,512,1,0,78.9016
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,12288,1,0,8.5690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,16384,1,0,12.0386
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,0,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,16,1,0,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,32,1,0,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,64,1,0,0.2760
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,128,1,0,0.3520
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,256,1,0,0.5357
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,512,1,0,0.7517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,32768,1,0,27.8698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1024,1,0,1.3179
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1536,1,0,1.8953
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,2048,1,0,2.4875
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,3072,1,0,3.8474
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,4096,1,0,5.0828
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,6144,1,0,8.3295
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,8192,1,0,11.1888
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,10240,1,0,14.3881
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,0,0.1926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,12288,1,0,18.1357
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,16,1,0,0.2382
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,32,1,0,0.2750
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,16384,1,0,24.9136
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,64,1,0,0.3503
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,128,1,0,0.4756
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,256,1,0,0.7514
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,512,1,0,1.3052
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1024,1,0,2.4485
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1536,1,0,3.6103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,2048,1,0,4.9716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,3072,1,0,7.7097
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,32768,1,0,58.6084
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,4096,1,0,10.4628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,6144,1,0,16.5702
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,8192,1,0,22.8930
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,0,0.1960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,16,1,0,0.2763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,10240,1,0,30.2829
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,32,1,0,0.3522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,64,1,0,0.4769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,12288,1,0,37.7016
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,128,1,0,0.7485
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,256,1,0,1.2980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,512,1,0,2.4145
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1024,1,0,4.7307
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,16384,1,0,51.5338
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1536,1,0,7.0021
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,2048,1,0,9.8421
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,3072,1,0,15.9125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,4096,1,0,20.6856
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,0,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,16,1,0,0.3497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,6144,1,0,34.2580
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,32,1,0,0.4790
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,8192,1,0,48.3096
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,64,1,0,0.7495
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,32768,1,0,115.6683
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,128,1,0,1.2946
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,10240,1,0,60.4192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,256,1,0,2.4110
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,512,1,0,4.6569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1024,1,0,9.3614
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1536,1,0,14.1202
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,12288,1,0,74.0172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,2048,1,0,19.7393
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,0,0.2172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,16,1,0,0.4810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,32,1,0,0.7500
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,16384,1,0,103.1391
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,3072,1,0,32.1453
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,64,1,0,1.2979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,128,1,0,2.3952
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,4096,1,0,44.3405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,256,1,0,4.6515
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,512,1,0,9.1657
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,6144,1,0,68.5511
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1024,1,0,19.2677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,0,0.2339
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1536,1,0,28.9333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,16,1,0,0.7530
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,32,1,0,1.2933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,8192,1,0,96.4652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,2048,1,0,40.3350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,64,1,0,2.3919
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,128,1,0,4.6212
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,256,1,0,9.0945
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,3072,1,0,65.0135
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,512,1,0,18.2675
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,0,0.2728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,16,1,0,1.2945
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,32,1,0,2.3901
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,4096,1,0,88.6393
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,64,1,0,4.6254
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1024,1,0,37.9129
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,128,1,0,9.0714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,0,0.3517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,256,1,0,17.9823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1536,1,0,58.8873
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,16,1,0,2.9291
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,32,1,0,4.6153
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,2048,1,0,80.5712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,512,1,0,37.5325
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,64,1,0,9.0712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,0,0.1572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,128,1,0,17.9493
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,16,1,0,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,32,1,0,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,64,1,0,0.2320
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,128,1,0,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,256,1,0,36.0037
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,256,1,0,0.3471
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,512,1,0,0.4710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1024,1,0,0.7494
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1536,1,0,1.0304
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,2048,1,0,1.3112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,3072,1,0,1.9162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,4096,1,0,2.5324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1024,1,0,76.5531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,6144,1,0,3.8434
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,512,1,0,74.0189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,8192,1,0,5.4580
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,10240,1,0,6.8989
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,12288,1,0,8.3986
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,16384,1,0,11.7059
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,0,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,16,1,0,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,64,1,0,0.2730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,32,1,0,0.2317
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,128,1,0,0.3495
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,256,1,0,0.4676
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,32768,1,0,27.3734
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1024,1,0,1.2932
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,512,1,0,0.7441
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1536,1,0,1.8634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,2048,1,0,2.4376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,3072,1,0,3.8386
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,4096,1,0,5.0955
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,6144,1,0,7.9355
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,8192,1,0,11.1036
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,0,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,16,1,0,0.2424
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,10240,1,0,13.9051
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,12288,1,0,17.4921
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,32,1,0,0.2571
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,64,1,0,0.3507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,16384,1,0,24.3283
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,128,1,0,0.4697
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,256,1,0,0.7380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,512,1,0,1.2859
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1024,1,0,2.3804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1536,1,0,3.5212
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,2048,1,0,4.7650
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,3072,1,0,7.4969
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,4096,1,0,10.3521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,32768,1,0,56.7350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,6144,1,0,16.2147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,0,0.1933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,8192,1,0,22.6553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,16,1,0,0.2728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,10240,1,0,29.8512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,32,1,0,0.3470
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,64,1,0,0.4707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,12288,1,0,36.7193
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,128,1,0,0.7371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,256,1,0,1.2789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,512,1,0,2.3622
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1024,1,0,4.5768
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,16384,1,0,50.0614
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1536,1,0,7.0345
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,2048,1,0,9.5758
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,3072,1,0,15.1331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,4096,1,0,21.6646
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,0,0.1965
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,6144,1,0,33.6176
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,16,1,0,0.3503
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,32,1,0,0.4688
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,8192,1,0,46.6619
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,64,1,0,0.7368
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,32768,1,0,114.3438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,128,1,0,1.2735
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,10240,1,0,59.9539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,256,1,0,2.3348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,512,1,0,4.5553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1024,1,0,9.1121
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,12288,1,0,73.9707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1536,1,0,13.8645
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,2048,1,0,19.1984
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,0,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,16,1,0,0.4714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,16384,1,0,100.0538
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,3072,1,0,31.4421
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,32,1,0,0.7388
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,4096,1,0,43.5103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,64,1,0,1.2729
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,128,1,0,2.3418
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,256,1,0,4.5555
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,512,1,0,8.9374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,6144,1,0,67.7961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1024,1,0,18.1671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,0,0.2234
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1536,1,0,28.5708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,32,1,0,1.2706
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,16,1,0,0.7382
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,2048,1,0,39.8711
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,64,1,0,2.3394
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,8192,1,0,92.9989
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,128,1,0,4.4979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,256,1,0,8.9583
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,3072,1,0,62.6021
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,512,1,0,17.6626
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,0,0.2686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,16,1,0,1.2717
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,32,1,0,2.3354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,64,1,0,4.5074
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,4096,1,0,86.5728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1024,1,0,36.9621
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,128,1,0,8.8494
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1536,1,0,58.4731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,0,0.3452
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,256,1,0,17.6577
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,16,1,0,2.3392
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,32,1,0,4.5016
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,2048,1,0,77.9455
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,512,1,0,35.4594
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,64,1,0,8.8763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,0,0.1628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,128,1,0,17.5360
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,16,1,0,0.1952
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,32,1,0,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,128,1,0,0.2525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,64,1,0,0.2267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,256,1,0,35.1230
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,256,1,0,0.3446
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,512,1,0,0.4665
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1024,1,0,74.0087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1024,1,0,0.7428
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1536,1,0,1.0145
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,2048,1,0,1.3046
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,3072,1,0,1.9063
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,4096,1,0,2.4981
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,6144,1,0,3.7645
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,512,1,0,72.2128
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,10240,1,0,6.8160
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,8192,1,0,5.3164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,12288,1,0,8.2963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,0,0.1726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,16384,1,0,11.8185
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,16,1,0,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,32,1,0,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,64,1,0,0.2573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,256,1,0,0.4649
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,512,1,0,0.7343
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,128,1,0,0.3408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,32768,1,0,27.3763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1024,1,0,1.2838
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1536,1,0,1.8401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,2048,1,0,2.4057
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,3072,1,0,3.5933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,4096,1,0,4.8658
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,6144,1,0,7.6960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,8192,1,0,10.6669
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,10240,1,0,14.2263
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,12288,1,0,17.5866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,0,0.1775
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,16,1,0,0.2236
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,32,1,0,0.2618
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,64,1,0,0.3408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,16384,1,0,24.0271
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,128,1,0,0.4637
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,256,1,0,0.7318
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,512,1,0,1.2663
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1024,1,0,2.3626
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1536,1,0,3.4730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,2048,1,0,4.6126
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,3072,1,0,7.4711
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,4096,1,0,10.1496
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,32768,1,0,56.3025
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,6144,1,0,16.3357
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,8192,1,0,22.5861
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,0,0.1891
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,16,1,0,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,10240,1,0,29.6059
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,32,1,0,0.3438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,64,1,0,0.4635
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,12288,1,0,35.9658
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,128,1,0,0.7314
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,256,1,0,1.2566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,512,1,0,2.3460
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1024,1,0,4.5636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,16384,1,0,50.0708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1536,1,0,6.7706
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,2048,1,0,9.3714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,3072,1,0,15.0539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,4096,1,0,20.3894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,0,0.1993
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,6144,1,0,33.3268
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,16,1,0,0.3458
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,32,1,0,0.4648
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,8192,1,0,46.5469
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,32768,1,0,112.5419
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,64,1,0,0.7318
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,128,1,0,1.2532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,10240,1,0,59.7834
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,256,1,0,2.3356
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,512,1,0,4.5070
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1024,1,0,9.0135
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,12288,1,0,72.2102
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1536,1,0,13.6843
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,2048,1,0,19.6072
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,0,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,16,1,0,0.4671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,16384,1,0,99.5646
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,3072,1,0,30.9667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,32,1,0,0.7318
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,64,1,0,1.2579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,4096,1,0,43.0559
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,128,1,0,2.3192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,256,1,0,4.4532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,512,1,0,8.7950
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1024,1,0,18.1504
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,6144,1,0,67.9127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,0,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1536,1,0,28.0933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,16,1,0,0.7317
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,32,1,0,1.5274
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,2048,1,0,39.0944
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,64,1,0,2.3142
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,8192,1,0,92.1937
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,128,1,0,4.4501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,256,1,0,8.8513
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,3072,1,0,62.5162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,0,0.2639
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,512,1,0,17.8039
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,16,1,0,1.2568
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,32,1,0,2.3043
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,64,1,0,4.4529
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,4096,1,0,86.4910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1024,1,0,36.2804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,128,1,0,10.9095
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,256,1,0,17.4878
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1536,1,0,56.4489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,0,0.3398
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,16,1,0,2.3112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,32,1,0,4.4529
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,2048,1,0,77.4640
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,512,1,0,35.3371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,64,1,0,8.7533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,1,1,0,0.2258
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,128,1,0,17.3010
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,256,1,0,34.5573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,32,1,0,0.3235
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,16,1,0,0.2972
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,128,1,0,0.4114
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,64,1,0,0.3776
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1024,1,0,73.2555
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,256,1,0,0.5588
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,512,1,0,0.8903
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,1024,1,0,1.5806
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,1536,1,0,2.2813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,2048,1,0,3.0080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,512,1,0,70.7946
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,3072,1,0,4.4517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,4096,1,0,6.1540
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,6144,1,0,9.0304
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,8192,1,0,12.5806
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,1,1,0,0.2396
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,16,1,0,0.3214
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,10240,1,0,15.7107
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,32,1,0,0.3708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,12288,1,0,19.4842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,64,1,0,0.4103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,16384,1,0,26.4862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,128,1,0,0.5569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,256,1,0,0.8818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,512,1,0,1.5572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,1024,1,0,2.9153
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,1536,1,0,4.3168
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,1,32768,1,0,50.0533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,2048,1,0,5.9491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,3072,1,0,8.7665
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,4096,1,0,12.1557
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,6144,1,0,18.2717
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,1,1,0,0.2567
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,8192,1,0,25.2287
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,16,1,0,0.3779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,10240,1,0,31.9472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,32,1,0,0.4127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,12288,1,0,38.5439
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,64,1,0,0.5560
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,128,1,0,0.8741
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,256,1,0,1.5371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,512,1,0,2.8991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,1024,1,0,5.8322
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,16384,1,0,46.6197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,1536,1,0,8.4458
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,2048,1,0,11.6392
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,3072,1,0,18.0099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,4096,1,0,24.0407
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,6144,1,0,37.3646
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,2,32768,1,0,99.8109
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,1,1,0,0.2682
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,8192,1,0,44.0681
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,16,1,0,0.4105
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,10240,1,0,56.4302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,32,1,0,0.5538
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,64,1,0,0.8707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,128,1,0,1.5345
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,256,1,0,2.8927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,12288,1,0,69.8486
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,512,1,0,5.7729
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,1024,1,0,11.3547
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,1536,1,0,17.2147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,16384,1,0,94.1707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,2048,1,0,23.0754
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,3072,1,0,35.8480
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,4096,1,0,42.7960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,1,1,0,0.2796
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,16,1,0,0.5554
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,6144,1,0,64.4065
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,32,1,0,0.8717
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,64,1,0,1.5365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,128,1,0,2.8739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,8192,1,0,89.6071
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,256,1,0,5.7619
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,512,1,0,11.3687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,4,32768,1,0,200.5459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,10240,1,0,114.7559
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,1024,1,0,22.4277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,1536,1,0,34.3131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,2048,1,0,40.1250
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,12288,1,0,142.1301
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,1,1,0,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,16,1,0,0.8747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,32,1,0,1.5376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,64,1,0,2.8780
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,3072,1,0,64.1185
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,128,1,0,5.7527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,8,16384,1,0,188.8788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,256,1,0,11.2622
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,4096,1,0,86.8419
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,512,1,0,22.2729
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,1,1,0,0.3629
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,1024,1,0,40.2200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,6144,1,0,129.5707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,16,1,0,1.5412
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,32,1,0,2.8822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,1536,1,0,60.0771
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,64,1,0,5.7541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,2048,1,0,80.0279
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,128,1,0,11.2763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,256,1,0,22.4093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,16,8192,1,0,180.3889
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,512,1,0,37.9238
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,1,1,0,0.4582
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,16,1,0,2.8735
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,3072,1,0,127.6006
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,32,1,0,6.2949
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,64,1,0,11.2546
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,128,1,0,22.2058
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,1024,1,0,79.9161
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,256,1,0,38.7316
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,32,4096,1,0,174.0930
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,1,1,0,0.6942
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,16,1,0,5.7592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,1536,1,0,119.6477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,32,1,0,11.2523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,64,1,0,22.2135
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,512,1,0,79.3211
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,1,1,0,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,128,1,0,37.4852
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,16,1,0,0.2616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,64,2048,1,0,168.5590
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,32,1,0,0.2895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,64,1,0,0.3402
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,128,1,0,0.3583
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,256,1,0,0.4813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,512,1,0,0.7312
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,1024,1,0,1.3104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,1536,1,0,1.8751
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,256,1,0,77.0639
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,2048,1,0,2.4619
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,3072,1,0,3.5882
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,4096,1,0,4.7445
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,6144,1,0,7.2381
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,8192,1,0,9.6564
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,128,1024,1,0,156.7775
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,10240,1,0,12.3132
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,1,1,0,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,16,1,0,0.2897
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,12288,1,0,15.1599
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,16384,1,0,20.3615
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,64,1,0,0.3587
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,32,1,0,0.3409
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,128,1,0,0.4764
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,256,1,0,0.7193
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,512,1,0,1.2896
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,1024,1,0,2.3888
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,1536,1,0,3.4518
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,1,32768,1,0,44.9019
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,2048,1,0,4.5746
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,3072,1,0,6.9258
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,4096,1,0,9.3346
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,128,256,512,1,0,157.4832
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,6144,1,0,14.4683
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,1,1,0,0.2107
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,8192,1,0,19.9605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,16,1,0,0.3394
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,10240,1,0,24.9351
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,32,1,0,0.3573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,64,1,0,0.4783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,12288,1,0,30.0902
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,128,1,0,0.7121
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,256,1,0,1.2769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,16384,1,0,41.5638
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,512,1,0,2.3612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,1024,1,0,4.4862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,1536,1,0,6.6628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,2048,1,0,8.8947
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,3072,1,0,13.8035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,4096,1,0,19.4245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,2,32768,1,0,84.2961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,6144,1,0,28.6912
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,1,1,0,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,8192,1,0,40.0958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,16,1,0,0.3554
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,32,1,0,0.4765
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,10240,1,0,50.3336
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,64,1,0,0.7848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,128,1,0,1.2687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,256,1,0,2.3470
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,12288,1,0,60.8670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,512,1,0,4.4479
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,1024,1,0,8.7465
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,1536,1,0,13.2773
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,16384,1,0,77.0147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,2048,1,0,17.6555
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,3072,1,0,27.6683
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,4096,1,0,38.0938
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,1,1,0,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,16,1,0,0.4783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,6144,1,0,59.7632
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,32,1,0,0.7127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,64,1,0,1.2665
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,128,1,0,2.3385
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,8192,1,0,73.3986
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,256,1,0,4.4438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,4,32768,1,0,173.1125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,512,1,0,8.6960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,10240,1,0,97.0900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,1024,1,0,17.2681
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,1536,1,0,26.3422
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,12288,1,0,114.8236
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,2048,1,0,36.0561
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,1,1,0,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,16,1,0,0.7143
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,32,1,0,1.2661
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,64,1,0,2.3341
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,3072,1,0,56.5317
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,128,1,0,4.4093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,8,16384,1,0,160.0473
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,256,1,0,8.6697
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,4096,1,0,70.7809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,512,1,0,17.1708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,1,1,0,0.2714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,1024,1,0,34.8450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,6144,1,0,111.4951
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,16,1,0,1.2680
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,1536,1,0,53.3200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,32,1,0,2.3287
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,2048,1,0,65.7578
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,64,1,0,4.4368
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,128,1,0,8.6805
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,16,8192,1,0,150.5865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,256,1,0,17.1288
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,1,1,0,0.3204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,3072,1,0,102.8797
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,512,1,0,34.4187
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,16,1,0,2.3397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,32,1,0,4.4124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,64,1,0,8.6722
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,128,1,0,17.0932
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,1024,1,0,64.2567
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,32,4096,1,0,144.6319
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,256,1,0,34.3100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,1,1,0,0.4489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,16,1,0,4.4283
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,1536,1,0,97.8750
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,32,1,0,9.7405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,64,1,0,17.0991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,1,1,0,0.1769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,512,1,0,62.4624
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,128,1,0,34.0835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,16,1,0,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,32,1,0,0.2685
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,64,2048,1,0,133.3206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,64,1,0,0.3077
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,128,1,0,0.3191
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,256,1,0,0.4175
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,512,1,0,0.6038
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,1024,1,0,1.0269
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,1536,1,0,1.5245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,256,1,0,62.7721
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,2048,1,0,1.9508
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,3072,1,0,2.7660
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,4096,1,0,3.6610
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,6144,1,0,5.5677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,8192,1,0,7.5138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,128,1024,1,0,129.1064
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,10240,1,0,9.7183
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,1,1,0,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,12288,1,0,11.6463
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,16384,1,0,15.9585
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,16,1,0,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,32,1,0,0.3089
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,64,1,0,0.3212
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,128,1,0,0.4155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,256,1,0,0.6417
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,1,32768,1,0,36.2353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,1024,1,0,1.8377
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,512,1,0,1.0034
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,1536,1,0,2.6609
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,64,256,512,1,0,127.1220
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,2048,1,0,3.4902
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,3072,1,0,5.2597
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,4096,1,0,7.1903
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,6144,1,0,11.0462
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,8192,1,0,15.2197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,1,1,0,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,16,1,0,0.3041
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,10240,1,0,19.9183
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,12288,1,0,23.8048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,32,1,0,0.3172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,64,1,0,0.4139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,128,1,0,0.6544
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,256,1,0,0.9848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,16384,1,0,32.1329
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,512,1,0,1.8102
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,1024,1,0,3.4164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,1536,1,0,5.0799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,2048,1,0,6.7361
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,3072,1,0,10.3527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,4096,1,0,14.2670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,2,32768,1,0,73.6658
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,6144,1,0,22.5485
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,8192,1,0,30.4347
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,1,1,0,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,32,1,0,0.4147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,10240,1,0,39.5497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,64,1,0,0.5852
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,128,1,0,0.9796
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,16,1,0,0.3192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,12288,1,0,48.5733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,256,1,0,1.7987
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,512,1,0,3.3760
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,1024,1,0,6.5800
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,1536,1,0,9.9609
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,16384,1,0,66.4833
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,2048,1,0,13.4218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,3072,1,0,21.2003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,4096,1,0,29.5950
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,1,1,0,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,16,1,0,0.4139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,6144,1,0,46.9568
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,32,1,0,0.5865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,64,1,0,0.9791
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,128,1,0,1.7857
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,8192,1,0,63.7005
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,4,32768,1,0,140.3085
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,256,1,0,3.3753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,512,1,0,6.5491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,10240,1,0,81.8008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,1024,1,0,13.0126
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,1536,1,0,20.1817
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,12288,1,0,99.3253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,2048,1,0,28.5575
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,1,1,0,0.2267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,16,1,0,0.5900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,3072,1,0,45.4302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,32,1,0,0.9845
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,64,1,0,1.7886
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,128,1,0,3.3438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,4096,1,0,59.6132
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,256,1,0,6.5671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,8,16384,1,0,129.3158
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,512,1,0,12.9419
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,1024,1,0,26.8484
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,1,1,0,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,6144,1,0,93.8262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,16,1,0,0.9827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,1536,1,0,40.2189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,32,1,0,1.7872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,64,1,0,3.3323
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,16,8192,1,0,119.4041
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,2048,1,0,55.3352
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,128,1,0,6.4977
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,256,1,0,14.8234
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,1,1,0,0.3015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,512,1,0,26.1347
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,3072,1,0,87.9736
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,16,1,0,1.7870
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,32,1,0,3.3295
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,64,1,0,6.5100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,1024,1,0,53.4321
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,128,1,0,12.8700
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,32,4096,1,0,116.3441
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,1,1,0,0.4133
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,256,1,0,29.5565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,16,1,0,3.3331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,32,1,0,6.4911
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,1536,1,0,80.4866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,64,1,0,12.8590
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,512,1,0,52.1327
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,1,1,0,0.1624
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,64,2048,1,0,103.8144
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,128,1,0,25.6545
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,16,1,0,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,32,1,0,0.2525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,64,1,0,0.2915
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,128,1,0,0.3012
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,256,1,0,0.4097
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,512,1,0,0.5394
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,1024,1,0,0.8822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,256,1,0,51.7808
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,1536,1,0,1.2517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,2048,1,0,1.6199
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,128,1024,1,0,100.2716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,3072,1,0,2.3894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,4096,1,0,3.1369
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,6144,1,0,4.7403
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,8192,1,0,6.4649
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,10240,1,0,8.2692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,12288,1,0,10.1294
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,1,1,0,0.1741
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,16,1,0,0.2547
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,16384,1,0,14.3444
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,32,1,0,0.2906
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,64,1,0,0.2999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,32,256,512,1,0,98.3661
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,128,1,0,0.3816
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,256,1,0,0.5341
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,1,32768,1,0,31.8121
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,512,1,0,0.8676
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,1024,1,0,1.5565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,1536,1,0,2.2607
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,2048,1,0,2.9664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,3072,1,0,4.4544
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,4096,1,0,6.1358
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,6144,1,0,9.3522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,8192,1,0,12.8866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,10240,1,0,16.4992
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,1,1,0,0.1841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,16,1,0,0.2936
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,12288,1,0,20.2488
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,32,1,0,0.2975
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,16384,1,0,28.5835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,64,1,0,0.3824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,128,1,0,0.5276
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,256,1,0,0.8576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,512,1,0,1.5255
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,1024,1,0,2.8725
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,1536,1,0,4.2713
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,2048,1,0,5.6611
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,3072,1,0,8.7780
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,4096,1,0,12.2038
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,2,32768,1,0,65.6142
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,6144,1,0,19.7322
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,8192,1,0,26.1885
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,1,1,0,0.1932
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,16,1,0,0.2973
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,32,1,0,0.3836
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,10240,1,0,33.9519
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,64,1,0,0.5273
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,128,1,0,0.8467
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,12288,1,0,42.1015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,256,1,0,1.7374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,512,1,0,2.8497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,1024,1,0,5.5166
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,1536,1,0,8.2734
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,16384,1,0,58.7397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,2048,1,0,11.1944
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,3072,1,0,18.0450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,4096,1,0,26.6939
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,1,1,0,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,6144,1,0,40.1314
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,16,1,0,0.3830
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,32,1,0,0.5294
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,8192,1,0,54.6427
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,64,1,0,0.8507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,128,1,0,1.7486
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,256,1,0,2.8372
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,4,32768,1,0,130.2576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,512,1,0,5.4648
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,10240,1,0,69.9848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,1024,1,0,10.8127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,1536,1,0,17.2943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,12288,1,0,85.8953
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,2048,1,0,23.9495
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,1,1,0,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,16,1,0,0.5298
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,3072,1,0,37.2582
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,32,1,0,0.8512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,8,16384,1,0,120.0642
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,64,1,0,1.5024
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,128,1,0,2.8069
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,4096,1,0,50.4372
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,256,1,0,5.5048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,512,1,0,10.7618
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,1,1,0,0.2404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,1024,1,0,24.6387
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,6144,1,0,79.9190
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,1536,1,0,34.8543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,16,1,0,0.8497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,32,1,0,1.5049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,64,1,0,2.8053
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,2048,1,0,47.0121
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,16,8192,1,0,109.4685
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,128,1,0,5.4210
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,256,1,0,10.7530
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,1,1,0,0.2813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,3072,1,0,73.5748
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,16,1,0,1.5079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,512,1,0,24.9488
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,32,1,0,3.3867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,1024,1,0,44.3895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,64,1,0,5.4372
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,32,4096,1,0,102.7112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,128,1,0,10.7060
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,1,1,0,0.3878
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,256,1,0,21.4830
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,1536,1,0,68.8928
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,16,1,0,2.8081
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,32,1,0,5.4334
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,64,1,0,10.6998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,512,1,0,43.2420
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,1,1,0,0.1548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,64,2048,1,0,94.0310
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,16,1,0,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,128,1,0,25.5696
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,32,1,0,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,64,1,0,0.2862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,128,1,0,0.2879
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,512,1,0,0.5125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,256,1,0,0.3735
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,256,1,0,42.9433
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,1024,1,0,0.8205
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,1536,1,0,1.1353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,2048,1,0,1.4660
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,128,1024,1,0,89.9034
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,4096,1,0,2.8569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,3072,1,0,2.1630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,6144,1,0,4.4327
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,8192,1,0,5.9895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,10240,1,0,7.5877
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,1,1,0,0.1626
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,16,1,0,0.2381
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,32,1,0,0.2833
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,12288,1,0,9.2139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,16,256,512,1,0,87.9621
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,64,1,0,0.2868
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,16384,1,0,13.1968
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,256,1,0,0.5647
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,128,1,0,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,512,1,0,0.8000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,1024,1,0,1.4128
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,1,32768,1,0,29.9758
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,1536,1,0,2.0613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,2048,1,0,2.6944
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,3072,1,0,4.0730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,4096,1,0,5.5827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,6144,1,0,8.4575
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,1,1,0,0.1815
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,8192,1,0,12.0195
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,10240,1,0,15.2859
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,12288,1,0,18.8002
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,16,1,0,0.2774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,32,1,0,0.2861
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,16384,1,0,26.5897
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,64,1,0,0.3684
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,256,1,0,0.7916
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,128,1,0,0.5011
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,512,1,0,1.3975
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,1024,1,0,2.6027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,1536,1,0,4.1868
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,2048,1,0,5.1414
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,3072,1,0,8.2747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,4096,1,0,11.0765
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,2,32768,1,0,59.8308
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,6144,1,0,17.6936
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,8192,1,0,24.3672
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,1,1,0,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,16,1,0,0.2880
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,32,1,0,0.3671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,10240,1,0,31.6446
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,64,1,0,0.5007
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,128,1,0,0.7842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,256,1,0,1.3762
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,12288,1,0,39.1333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,512,1,0,2.5820
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,1024,1,0,4.9961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,16384,1,0,53.9128
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,1536,1,0,7.5128
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,2048,1,0,10.4573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,3072,1,0,16.6958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,4096,1,0,24.6234
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,1,1,0,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,6144,1,0,36.7448
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,16,1,0,0.3674
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,32,1,0,0.5012
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,128,1,0,1.3667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,64,1,0,0.9196
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,8192,1,0,51.0329
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,4,32768,1,0,120.4574
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,256,1,0,2.5536
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,10240,1,0,65.5905
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,512,1,0,4.9592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,1024,1,0,9.9014
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,12288,1,0,79.6755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,1536,1,0,15.0961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,2048,1,0,22.9256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,1,1,0,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,16,1,0,0.5008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,3072,1,0,33.9600
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,32,1,0,0.7898
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,8,16384,1,0,110.3890
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,64,1,0,1.3701
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,128,1,0,2.5392
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,256,1,0,4.9786
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,4096,1,0,47.1917
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,512,1,0,11.4294
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,6144,1,0,73.8417
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,1024,1,0,20.2997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,1,1,0,0.2353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,1536,1,0,31.5011
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,16,1,0,0.9199
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,32,1,0,1.3668
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,64,1,0,2.5402
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,2048,1,0,44.7192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,16,8192,1,0,103.1950
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,128,1,0,4.9160
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,256,1,0,9.7042
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,1,1,0,0.2730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,3072,1,0,68.9160
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,512,1,0,19.9037
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,16,1,0,1.6327
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,32,1,0,3.0769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,1024,1,0,40.5262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,64,1,0,4.9094
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,32,4096,1,0,97.3253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,128,1,0,9.6788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,1,1,0,0.3725
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,256,1,0,19.3159
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,1536,1,0,62.6826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,16,1,0,2.5435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,32,1,0,4.9055
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,64,1,0,9.6555
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,1,1,0,0.1484
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,512,1,0,40.0757
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,64,2048,1,0,87.3628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,128,1,0,23.3217
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,16,1,0,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,32,1,0,0.2309
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,64,1,0,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,128,1,0,0.2783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,512,1,0,0.4956
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,256,1,0,0.3642
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,1024,1,0,0.7904
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,1536,1,0,1.0908
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,256,1,0,46.2489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,2048,1,0,1.4177
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,128,1024,1,0,82.8136
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,3072,1,0,2.0603
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,4096,1,0,2.7197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,6144,1,0,4.2362
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,8192,1,0,5.7047
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,10240,1,0,7.2073
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,12288,1,0,8.7430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,1,1,0,0.1650
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,8,256,512,1,0,80.2927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,16,1,0,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,16384,1,0,12.4375
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,32,1,0,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,64,1,0,0.2824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,128,1,0,0.3592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,256,1,0,0.4902
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,1024,1,0,1.4962
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,512,1,0,0.7717
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,1,32768,1,0,29.0395
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,1536,1,0,1.9470
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,2048,1,0,2.7030
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,3072,1,0,3.8475
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,4096,1,0,5.1596
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,6144,1,0,8.3713
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,8192,1,0,11.5915
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,1,1,0,0.1757
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,10240,1,0,14.6158
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,16,1,0,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,12288,1,0,18.4220
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,32,1,0,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,16384,1,0,25.2042
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,64,1,0,0.3612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,128,1,0,0.5609
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,256,1,0,0.7646
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,512,1,0,1.3231
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,1024,1,0,2.4744
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,1536,1,0,3.6685
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,2048,1,0,4.9145
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,3072,1,0,7.7036
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,4096,1,0,10.5182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,2,32768,1,0,59.6727
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,6144,1,0,16.8759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,8192,1,0,22.9127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,1,1,0,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,16,1,0,0.2865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,10240,1,0,30.3244
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,32,1,0,0.3616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,12288,1,0,37.4311
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,64,1,0,0.4847
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,128,1,0,0.7594
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,256,1,0,1.3172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,512,1,0,2.4490
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,1024,1,0,4.7676
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,16384,1,0,51.8211
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,1536,1,0,7.1261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,2048,1,0,9.9790
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,3072,1,0,15.8032
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,4096,1,0,23.7054
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,1,1,0,0.1954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,6144,1,0,35.3241
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,16,1,0,0.3601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,32,1,0,0.4844
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,8192,1,0,49.1989
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,64,1,0,0.7583
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,128,1,0,1.5599
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,4,32768,1,0,116.1850
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,256,1,0,2.4373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,10240,1,0,61.8504
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,512,1,0,4.7258
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,1024,1,0,9.2913
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,12288,1,0,76.2512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,1536,1,0,14.6474
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,2048,1,0,20.6846
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,1,1,0,0.2043
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,16,1,0,0.5595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,3072,1,0,32.8242
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,8,16384,1,0,106.4601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,32,1,0,0.7598
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,64,1,0,1.3079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,128,1,0,2.4172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,4096,1,0,44.7149
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,256,1,0,4.7187
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,512,1,0,9.2888
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,1024,1,0,19.7711
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,6144,1,0,69.9258
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,1,1,0,0.2296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,1536,1,0,30.1744
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,16,1,0,0.7605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,32,1,0,1.3101
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,64,1,0,2.4080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,2048,1,0,43.0033
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,16,8192,1,0,97.5849
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,128,1,0,4.6541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,256,1,0,9.1443
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,3072,1,0,65.5406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,512,1,0,19.2613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,1,1,0,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,16,1,0,1.3070
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,32,1,0,2.4112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,1024,1,0,39.3326
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,64,1,0,4.6599
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,32,4096,1,0,89.8723
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,128,1,0,9.1420
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,1,1,0,0.3671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,1536,1,0,60.2038
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,16,1,0,2.4164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,256,1,0,18.1762
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,32,1,0,4.6461
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,64,1,0,9.1569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,512,1,0,37.5304
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,64,2048,1,0,81.8951
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,1,1,0,0.1432
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,128,1,0,18.1336
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,16,1,0,0.2086
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,32,1,0,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,64,1,0,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,128,1,0,0.2701
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,256,1,0,0.3601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,512,1,0,0.4892
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,1024,1,0,0.7757
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,256,1,0,44.1644
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,1536,1,0,1.0688
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,128,1024,1,0,76.7914
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,2048,1,0,1.3620
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,3072,1,0,1.9926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,4096,1,0,2.6711
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,6144,1,0,4.0601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,8192,1,0,5.6191
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,10240,1,0,7.0138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,12288,1,0,8.7507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,4,256,512,1,0,74.2224
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,16384,1,0,12.1125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,1,1,0,0.1586
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,16,1,0,0.2287
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,32,1,0,0.2820
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,64,1,0,0.2698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,128,1,0,0.3543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,1,32768,1,0,27.7802
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,256,1,0,0.4819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,512,1,0,0.7585
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,1024,1,0,1.3256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,1536,1,0,1.9057
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,2048,1,0,2.4970
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,3072,1,0,3.9167
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,4096,1,0,5.0748
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,6144,1,0,8.1011
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,8192,1,0,11.1578
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,10240,1,0,14.2125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,1,1,0,0.1653
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,16,1,0,0.2681
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,12288,1,0,17.9176
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,32,1,0,0.2770
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,16384,1,0,24.9971
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,64,1,0,0.3567
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,128,1,0,0.4776
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,256,1,0,0.8684
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,512,1,0,1.3003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,1024,1,0,2.4375
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,1536,1,0,3.5867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,2048,1,0,4.8388
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,3072,1,0,7.6230
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,4096,1,0,10.2158
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,2,32768,1,0,59.9650
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,6144,1,0,16.3854
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,8192,1,0,22.8509
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,1,1,0,0.1795
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,16,1,0,0.2691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,10240,1,0,29.9345
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,32,1,0,0.3775
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,64,1,0,0.4758
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,128,1,0,0.7447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,12288,1,0,36.8047
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,256,1,0,1.5109
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,512,1,0,2.4032
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,1024,1,0,4.5977
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,16384,1,0,50.7998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,1536,1,0,6.8697
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,2048,1,0,9.6002
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,3072,1,0,15.6424
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,4096,1,0,22.5694
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,1,1,0,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,6144,1,0,34.3769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,16,1,0,0.3810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,32,1,0,0.4785
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,8192,1,0,47.5429
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,64,1,0,0.7444
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,4,32768,1,0,114.4668
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,128,1,0,1.2833
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,256,1,0,2.3708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,10240,1,0,60.2079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,512,1,0,4.5498
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,1024,1,0,9.0531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,12288,1,0,74.0944
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,1536,1,0,14.1038
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,2048,1,0,21.6740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,1,1,0,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,16,1,0,0.4792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,3072,1,0,31.7708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,8,16384,1,0,102.9858
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,32,1,0,0.7459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,64,1,0,1.2759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,128,1,0,2.3500
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,4096,1,0,44.3819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,256,1,0,4.5868
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,512,1,0,10.6970
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,6144,1,0,68.2444
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,1024,1,0,19.3216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,1,1,0,0.2279
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,1536,1,0,29.2846
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,16,1,0,0.7464
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,2048,1,0,39.1549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,32,1,0,1.2759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,64,1,0,2.8828
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,16,8192,1,0,93.3889
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,128,1,0,4.5242
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,256,1,0,8.9677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,3072,1,0,63.3028
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,1,1,0,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,512,1,0,18.6966
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,16,1,0,1.2801
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,32,1,0,2.3479
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,64,1,0,4.5295
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,1024,1,0,37.7716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,32,4096,1,0,87.1545
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,128,1,0,8.8868
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,256,1,0,17.8126
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,1536,1,0,59.0405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,16,1,0,2.3470
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,1,1,0,0.3629
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,32,1,0,4.5155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,512,1,0,36.3147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,1,1,0,0.1506
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,64,2048,1,0,79.9373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,64,1,0,8.8980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,16,1,0,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,128,1,0,21.8804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,32,1,0,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,64,1,0,0.2627
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,128,1,0,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,256,1,0,35.3925
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,256,1,0,0.3571
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,512,1,0,0.4846
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,1024,1,0,0.7699
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,128,1024,1,0,74.6895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,1536,1,0,1.0556
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,2048,1,0,1.3542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,3072,1,0,1.9797
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,4096,1,0,2.6233
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,6144,1,0,3.9525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,8192,1,0,5.5693
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,10240,1,0,7.0032
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,12288,1,0,8.5290
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,2,256,512,1,0,72.6582
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,1,1,0,0.1585
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,16384,1,0,11.9539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,16,1,0,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,32,1,0,0.2760
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,64,1,0,0.2743
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,128,1,0,0.3525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,512,1,0,0.7509
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,256,1,0,0.4740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,1024,1,0,1.3052
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,1536,1,0,1.8867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,1,32768,1,0,27.5675
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,2048,1,0,2.6066
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,3072,1,0,3.7030
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,4096,1,0,5.0196
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,6144,1,0,7.9721
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,8192,1,0,10.7760
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,10240,1,0,14.2943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,1,1,0,0.1655
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,12288,1,0,17.6751
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,16,1,0,0.2583
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,32,1,0,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,16384,1,0,24.2062
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,64,1,0,0.3529
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,128,1,0,0.4730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,256,1,0,0.7413
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,512,1,0,1.2854
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,1024,1,0,2.3778
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,1536,1,0,3.5111
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,2048,1,0,4.7849
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,3072,1,0,7.3571
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,4096,1,0,10.2133
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,2,32768,1,0,58.5753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,6144,1,0,15.9468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,8192,1,0,22.2080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,1,1,0,0.1748
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,16,1,0,0.2875
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,10240,1,0,29.2744
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,32,1,0,0.3749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,12288,1,0,35.9426
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,64,1,0,0.4698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,128,1,0,0.7380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,256,1,0,1.2732
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,512,1,0,2.3393
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,1024,1,0,4.5477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,16384,1,0,50.0659
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,1536,1,0,6.9105
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,2048,1,0,9.5369
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,3072,1,0,15.3667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,4096,1,0,22.7507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,1,1,0,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,6144,1,0,34.2760
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,16,1,0,0.3523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,32,1,0,0.4721
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,8192,1,0,47.3070
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,64,1,0,0.7376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,4,32768,1,0,113.2333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,128,1,0,1.5123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,256,1,0,2.3346
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,10240,1,0,60.1939
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,512,1,0,4.5118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,1024,1,0,8.8771
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,12288,1,0,74.9511
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,1536,1,0,13.8493
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,2048,1,0,20.0933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,1,1,0,0.1917
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,16,1,0,0.4745
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,3072,1,0,31.7470
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,32,1,0,0.8715
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,8,16384,1,0,101.9027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,64,1,0,1.2580
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,128,1,0,2.3167
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,4096,1,0,43.2501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,256,1,0,4.4826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,512,1,0,8.8396
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,1024,1,0,19.4583
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,6144,1,0,67.4866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,1,1,0,0.2233
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,1536,1,0,29.0900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,16,1,0,0.7397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,32,1,0,1.2684
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,64,1,0,2.8441
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,2048,1,0,39.7219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,16,8192,1,0,92.7775
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,128,1,0,4.4497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,256,1,0,8.8689
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,3072,1,0,62.9009
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,512,1,0,17.9192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,1,1,0,0.2541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,16,1,0,1.2665
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,32,1,0,2.3162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,64,1,0,4.4575
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,1024,1,0,36.8114
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,128,1,0,8.7763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,32,4096,1,0,89.5733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,1536,1,0,57.6067
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,256,1,0,17.5397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,1,1,0,0.3591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,16,1,0,2.8582
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,32,1,0,4.4579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,512,1,0,35.3533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,64,2048,1,0,78.5478
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,64,1,0,8.7601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,128,1,0,17.3666
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,256,1,0,34.9169
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,128,1024,1,0,74.6889
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,float16,1,256,512,1,0,71.7814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,0,0.2583
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,16,1,0,0.2908
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,32,1,0,0.2988
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,64,1,0,0.3265
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,128,1,0,0.4206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,256,1,0,0.5610
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,512,1,0,0.8397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1024,1,0,1.4592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1536,1,0,2.0992
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,2048,1,0,2.7409
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,3072,1,0,4.0848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,4096,1,0,5.4401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,6144,1,0,8.0664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,0,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,16,1,0,0.2988
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,8192,1,0,10.7673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,32,1,0,0.3402
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,64,1,0,0.4042
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,128,1,0,0.5859
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,10240,1,0,13.5138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,256,1,0,0.8406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,512,1,0,1.4541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1024,1,0,2.7144
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1536,1,0,4.0189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,12288,1,0,16.3748
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,2048,1,0,5.2489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,3072,1,0,7.8768
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,16384,1,0,22.2142
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,4096,1,0,10.6511
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,6144,1,0,15.8284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,0,0.2683
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,8192,1,0,21.4595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,32768,1,0,40.5997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,10240,1,0,26.8769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,16,1,0,0.3221
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,12288,1,0,32.5349
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,32,1,0,0.4192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,64,1,0,0.5616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,16384,1,0,36.9474
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,128,1,0,0.8414
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,256,1,0,1.4500
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,512,1,0,2.9116
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1024,1,0,5.2070
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1536,1,0,7.7523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,2048,1,0,10.5990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,3072,1,0,15.4733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,4096,1,0,20.7412
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,32768,1,0,81.0455
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,6144,1,0,31.4960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,8192,1,0,35.1296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,0,0.2777
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,16,1,0,0.4088
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,32,1,0,0.5613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,10240,1,0,44.4895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,64,1,0,0.9197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,128,1,0,1.4518
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,12288,1,0,54.3335
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,256,1,0,2.7115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,512,1,0,5.2049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1024,1,0,10.1882
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,16384,1,0,73.6437
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1536,1,0,15.2008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,2048,1,0,20.2391
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,3072,1,0,30.7678
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,4096,1,0,34.0421
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,0,0.2853
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,16,1,0,0.5625
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,6144,1,0,51.9030
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,32,1,0,0.8437
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,64,1,0,1.4526
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,8192,1,0,70.0671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,128,1,0,2.6996
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,32768,1,0,159.0079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,256,1,0,5.6458
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,10240,1,0,89.5878
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,512,1,0,10.1875
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,12288,1,0,106.3074
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1024,1,0,20.1035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1536,1,0,30.1836
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,2048,1,0,33.0134
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,0,0.2941
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,16,1,0,0.8447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,16384,1,0,144.4772
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,32,1,0,1.4537
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,3072,1,0,50.3197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,64,1,0,2.9816
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,128,1,0,5.1859
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,4096,1,0,67.8080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,256,1,0,10.1463
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,512,1,0,20.0257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1024,1,0,35.5219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,6144,1,0,101.3773
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,0,0.3187
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1536,1,0,49.1649
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,16,1,0,1.4543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,32,1,0,2.7060
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,64,1,0,5.1818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,2048,1,0,65.7548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,128,1,0,11.2487
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,8192,1,0,137.1930
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,256,1,0,21.9846
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,512,1,0,32.6257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,0,0.4070
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,3072,1,0,98.2067
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,16,1,0,2.7050
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,32,1,0,5.1919
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,64,1,0,10.1436
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,4096,1,0,132.5920
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1024,1,0,65.1180
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,128,1,0,20.0154
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,0,0.5611
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,256,1,0,36.4737
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,16,1,0,5.1853
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1536,1,0,95.8854
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,32,1,0,10.1345
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,64,1,0,20.0280
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,0,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,512,1,0,64.9445
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,2048,1,0,128.3811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,128,1,0,32.5304
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,16,1,0,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,32,1,0,0.2760
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,64,1,0,0.2980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,128,1,0,0.3722
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,256,1,0,0.5000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,512,1,0,0.7021
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1024,1,0,1.2003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,256,1,0,64.8014
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1536,1,0,1.7001
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,2048,1,0,2.1996
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,3072,1,0,3.2478
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1024,1,0,127.2772
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,4096,1,0,4.2751
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,6144,1,0,6.4355
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,8192,1,0,8.6134
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,10240,1,0,10.8628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,12288,1,0,13.1762
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,0,0.2457
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,16,1,0,0.2783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,16384,1,0,17.9841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,32,1,0,0.2942
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,64,1,0,0.3619
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,128,1,0,0.4792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,256,1,0,0.6967
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,512,1,0,1.1955
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,512,1,0,126.8862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1024,1,0,2.1799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1536,1,0,3.1689
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,32768,1,0,39.3297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,2048,1,0,4.1680
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,3072,1,0,6.4170
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,4096,1,0,8.3518
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,6144,1,0,12.6155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,8192,1,0,17.0491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,0,0.2504
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,10240,1,0,21.7248
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,16,1,0,0.2958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,32,1,0,0.3581
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,12288,1,0,26.1514
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,64,1,0,0.5083
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,128,1,0,0.6980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,16384,1,0,35.6721
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,256,1,0,1.1950
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,512,1,0,2.1667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1024,1,0,4.1176
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1536,1,0,6.1191
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,2048,1,0,8.1497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,3072,1,0,12.2598
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,32768,1,0,71.1966
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,4096,1,0,16.4755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,6144,1,0,25.4311
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,8192,1,0,33.8972
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,0,0.2521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,16,1,0,0.3607
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,32,1,0,0.4800
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,10240,1,0,42.9141
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,64,1,0,0.6963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,128,1,0,1.1904
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,256,1,0,2.1647
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,12288,1,0,52.4162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,512,1,0,4.5077
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,16384,1,0,63.9139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1024,1,0,8.0549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1536,1,0,11.9849
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,2048,1,0,15.9910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,3072,1,0,24.3362
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,4096,1,0,32.7508
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,0,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,6144,1,0,50.0043
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,16,1,0,0.5106
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,32,1,0,0.7026
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,32768,1,0,142.4287
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,8192,1,0,60.3284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,64,1,0,1.1920
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,128,1,0,2.4100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,256,1,0,4.1042
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,10240,1,0,76.4659
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,512,1,0,8.9189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,12288,1,0,93.0895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1024,1,0,15.8550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1536,1,0,23.7780
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,2048,1,0,31.7700
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,0,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,16,1,0,0.7013
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,3072,1,0,48.4592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,32,1,0,1.3231
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,16384,1,0,127.6105
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,64,1,0,2.1660
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,4096,1,0,57.9964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,128,1,0,4.0964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,256,1,0,8.0234
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,512,1,0,15.8257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,6144,1,0,88.7319
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1024,1,0,31.4507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,0,0.3070
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1536,1,0,47.2572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,16,1,0,1.1948
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,32,1,0,2.1649
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,64,1,0,4.0895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,2048,1,0,55.9136
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,8192,1,0,120.4759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,128,1,0,9.1210
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,256,1,0,15.8341
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,3072,1,0,85.6558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,0,0.3573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,512,1,0,34.9292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,16,1,0,2.1648
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,32,1,0,4.0856
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,64,1,0,9.1645
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1024,1,0,55.3812
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,4096,1,0,115.8574
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,128,1,0,15.7642
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,0,0.4769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,256,1,0,31.4024
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1536,1,0,83.3088
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,16,1,0,4.0840
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,32,1,0,9.0687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,64,1,0,15.7555
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,512,1,0,55.1356
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,2048,1,0,111.5296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,0,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,16,1,0,0.2442
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,128,1,0,31.2624
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,32,1,0,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,64,1,0,0.2868
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,128,1,0,0.3254
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,256,1,0,0.4226
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,512,1,0,0.5934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1024,1,0,0.9856
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,256,1,0,55.1048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1536,1,0,1.3822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,2048,1,0,1.7734
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1024,1,0,110.6024
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,3072,1,0,2.6007
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,4096,1,0,3.4315
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,8192,1,0,6.8943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,6144,1,0,5.1572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,10240,1,0,8.7579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,12288,1,0,10.5473
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,0,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,16,1,0,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,512,1,0,110.1014
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,16384,1,0,14.5772
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,64,1,0,0.3302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,32,1,0,0.2793
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,128,1,0,0.4209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,256,1,0,0.5927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,512,1,0,0.9706
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,32768,1,0,32.6092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1024,1,0,1.7569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1536,1,0,2.5344
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,2048,1,0,3.4514
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,3072,1,0,4.9547
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,4096,1,0,6.6127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,6144,1,0,10.0701
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,8192,1,0,13.6388
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,0,0.2297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,10240,1,0,17.3307
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,16,1,0,0.2718
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,12288,1,0,21.0425
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,32,1,0,0.3277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,16384,1,0,29.0525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,64,1,0,0.4253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,128,1,0,0.5895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,256,1,0,0.9666
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,512,1,0,1.7396
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1024,1,0,3.2656
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1536,1,0,4.8263
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,2048,1,0,6.3997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,3072,1,0,9.7035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,4096,1,0,13.0922
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,32768,1,0,64.9646
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,6144,1,0,20.0150
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,0,0.2374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,8192,1,0,27.1054
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,16,1,0,0.3434
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,32,1,0,0.4227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,10240,1,0,34.4430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,64,1,0,0.5927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,128,1,0,0.9715
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,256,1,0,1.7497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,12288,1,0,41.9463
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,512,1,0,3.2524
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1024,1,0,6.3255
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,16384,1,0,57.5962
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1536,1,0,9.4495
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,2048,1,0,12.5784
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,3072,1,0,19.9605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,4096,1,0,26.0103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,0,0.2554
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,6144,1,0,39.8388
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,16,1,0,0.4260
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,32768,1,0,122.6755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,8192,1,0,54.0180
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,32,1,0,0.6634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,64,1,0,0.9692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,128,1,0,1.7501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,10240,1,0,68.6451
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,256,1,0,3.2467
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,512,1,0,6.2954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,12288,1,0,83.6286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1024,1,0,12.4628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1536,1,0,18.6839
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,2048,1,0,24.9777
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,0,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,16,1,0,0.5927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,16384,1,0,107.7915
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,3072,1,0,38.2703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,32,1,0,0.9772
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,64,1,0,1.7439
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,128,1,0,3.2373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,4096,1,0,53.2642
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,256,1,0,6.2656
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,512,1,0,12.4192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1024,1,0,24.6999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,6144,1,0,79.3689
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,0,0.2728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1536,1,0,37.1009
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,16,1,0,0.9764
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,32,1,0,1.7403
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,8192,1,0,102.0596
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,2048,1,0,49.6375
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,64,1,0,3.2358
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,128,1,0,6.2708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,256,1,0,12.4214
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,0,0.3271
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,3072,1,0,76.2369
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,512,1,0,24.5774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,16,1,0,1.7431
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,32,1,0,3.2319
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,4096,1,0,95.9140
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,64,1,0,6.2563
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1024,1,0,49.1453
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,128,1,0,12.3460
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,0,0.4216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,256,1,0,24.5737
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1536,1,0,73.9092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,16,1,0,3.7678
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,32,1,0,6.2550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,2048,1,0,91.6890
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,64,1,0,12.3536
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,512,1,0,48.8851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,0,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,128,1,0,24.5045
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,16,1,0,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,32,1,0,0.2384
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,64,1,0,0.2649
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,128,1,0,0.3067
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,256,1,0,0.3988
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,512,1,0,0.5394
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1024,1,0,0.8713
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,256,1,0,48.8547
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1536,1,0,1.2082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,2048,1,0,1.5553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1024,1,0,102.6920
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,3072,1,0,2.2911
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,4096,1,0,2.9926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,6144,1,0,4.5049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,8192,1,0,6.0913
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,10240,1,0,7.6289
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,12288,1,0,9.2660
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,0,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,512,1,0,90.1952
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,16,1,0,0.2439
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,16384,1,0,12.7920
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,32,1,0,0.2649
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,64,1,0,0.3108
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,128,1,0,0.3974
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,256,1,0,0.5380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,32768,1,0,29.1848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1024,1,0,1.5376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,512,1,0,0.8604
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1536,1,0,2.3622
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,2048,1,0,3.0378
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,3072,1,0,4.3177
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,4096,1,0,5.7739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,6144,1,0,8.7371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,8192,1,0,11.9065
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,0,0.2223
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,10240,1,0,15.1981
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,12288,1,0,18.5056
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,16,1,0,0.2493
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,32,1,0,0.3088
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,16384,1,0,25.5204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,64,1,0,0.3987
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,128,1,0,0.5404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,256,1,0,0.8625
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,512,1,0,1.5203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1024,1,0,2.8514
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1536,1,0,4.1722
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,2048,1,0,5.5535
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,3072,1,0,8.7391
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,32768,1,0,58.1580
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,4096,1,0,11.3552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,6144,1,0,17.4520
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,8192,1,0,23.7373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,0,0.2306
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,16,1,0,0.3274
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,32,1,0,0.3994
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,10240,1,0,30.1991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,12288,1,0,36.8267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,64,1,0,0.5396
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,128,1,0,0.8586
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,256,1,0,1.5218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,512,1,0,2.8430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1024,1,0,5.4841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,16384,1,0,50.8183
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1536,1,0,8.1522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,2048,1,0,10.8718
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,3072,1,0,16.6714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,4096,1,0,23.3384
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,0,0.2328
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,6144,1,0,34.7101
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,16,1,0,0.3998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,32,1,0,0.5399
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,8192,1,0,47.2230
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,32768,1,0,116.1487
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,64,1,0,1.0060
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,128,1,0,1.5210
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,10240,1,0,60.9137
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,256,1,0,2.8118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,512,1,0,5.4401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,12288,1,0,73.4535
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1024,1,0,10.7472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1536,1,0,17.5961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,2048,1,0,21.5666
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,0,0.2362
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,16384,1,0,101.5927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,16,1,0,0.5425
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,32,1,0,0.8641
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,3072,1,0,33.1601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,64,1,0,1.5163
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,4096,1,0,44.9127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,128,1,0,2.8189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,256,1,0,5.4586
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,512,1,0,10.6674
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,6144,1,0,70.6767
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1024,1,0,24.2865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,0,0.2611
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1536,1,0,31.9776
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,16,1,0,0.8634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,32,1,0,1.5198
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,8192,1,0,94.2460
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,64,1,0,2.8256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,2048,1,0,42.8451
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,128,1,0,5.4118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,256,1,0,12.7483
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,0,0.2987
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,3072,1,0,66.0374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,512,1,0,24.9635
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,16,1,0,1.5201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,32,1,0,2.8152
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,4096,1,0,89.6824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,64,1,0,5.4081
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1024,1,0,48.0111
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,128,1,0,12.9205
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1536,1,0,63.6527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,0,0.3962
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,256,1,0,21.1781
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,16,1,0,2.8196
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,32,1,0,5.4006
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,2048,1,0,85.3889
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,512,1,0,42.0304
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,64,1,0,10.6264
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,0,0.1985
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,128,1,0,21.0732
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,32,1,0,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,16,1,0,0.2322
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,64,1,0,0.2462
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,128,1,0,0.2919
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,256,1,0,0.3796
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,512,1,0,0.5182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1024,1,0,84.3552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1024,1,0,0.8171
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,256,1,0,49.8905
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1536,1,0,1.1307
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,2048,1,0,1.5138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,3072,1,0,2.1095
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,4096,1,0,2.7866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,6144,1,0,4.1925
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,8192,1,0,5.6292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,12288,1,0,8.6673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,10240,1,0,7.1002
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,16384,1,0,11.9429
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,0,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,512,1,0,99.1913
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,16,1,0,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,32,1,0,0.2455
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,64,1,0,0.3022
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,128,1,0,0.3844
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,256,1,0,0.5848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,512,1,0,0.8123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,32768,1,0,27.4531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1024,1,0,1.4253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1536,1,0,2.0511
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,2048,1,0,2.6723
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,3072,1,0,4.0127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,4096,1,0,5.3664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,6144,1,0,8.1238
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,8192,1,0,11.0809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,10240,1,0,14.0926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,16,1,0,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,0,0.2115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,12288,1,0,17.2071
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,32,1,0,0.3064
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,64,1,0,0.3820
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,16384,1,0,23.8014
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,128,1,0,0.5180
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,256,1,0,0.8087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,512,1,0,1.4132
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1024,1,0,2.6483
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1536,1,0,3.8734
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,2048,1,0,5.1321
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,3072,1,0,7.7945
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,32768,1,0,54.9474
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,4096,1,0,10.5217
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,6144,1,0,16.1271
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,8192,1,0,22.3682
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,0,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,16,1,0,0.2939
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,10240,1,0,28.1009
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,32,1,0,0.3828
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,12288,1,0,34.6481
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,64,1,0,0.5883
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,128,1,0,0.8119
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,256,1,0,1.4099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,512,1,0,2.6293
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,16384,1,0,47.5066
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1024,1,0,5.0387
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1536,1,0,7.5307
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,2048,1,0,10.7703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,3072,1,0,15.3970
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,4096,1,0,20.8933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,6144,1,0,32.1696
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,0,0.2307
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,16,1,0,0.4103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,32768,1,0,109.4054
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,8192,1,0,43.9125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,32,1,0,0.5187
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,64,1,0,0.8125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,128,1,0,1.4123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,10240,1,0,55.9339
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,256,1,0,2.6147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,512,1,0,5.8939
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1024,1,0,9.8967
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1536,1,0,14.8279
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,12288,1,0,69.1589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,2048,1,0,21.2954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,0,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,16384,1,0,94.6962
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,16,1,0,0.5217
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,3072,1,0,30.6490
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,4096,1,0,41.5544
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,32,1,0,0.8151
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,64,1,0,1.6673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,128,1,0,2.6082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,256,1,0,6.0454
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,512,1,0,9.8310
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,6144,1,0,64.2013
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,0,0.2422
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1024,1,0,19.6077
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1536,1,0,29.4027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,16,1,0,0.9574
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,8192,1,0,87.5002
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,2048,1,0,39.4739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,32,1,0,1.4082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,64,1,0,2.6024
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,128,1,0,4.9910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,256,1,0,9.8169
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,3072,1,0,63.9692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,512,1,0,19.4801
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,0,0.2829
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,16,1,0,1.7015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,4096,1,0,82.9167
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,32,1,0,2.6000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,64,1,0,4.9895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1024,1,0,44.6345
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,128,1,0,9.7932
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1536,1,0,58.6955
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,256,1,0,19.4201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,0,0.3808
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,16,1,0,2.6063
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,32,1,0,5.0008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,2048,1,0,78.6428
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,0,0.1994
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,512,1,0,46.3164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,64,1,0,9.7835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,128,1,0,19.3844
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,16,1,0,0.2279
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,32,1,0,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,64,1,0,0.2371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,128,1,0,0.2990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,256,1,0,0.3717
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,512,1,0,0.5062
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,256,1,0,47.2595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1024,1,0,0.7970
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1024,1,0,88.9752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1536,1,0,1.0852
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,2048,1,0,1.4642
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,3072,1,0,2.0454
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,4096,1,0,2.6810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,6144,1,0,4.0214
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,8192,1,0,5.4155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,0,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,10240,1,0,6.8494
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,12288,1,0,8.3290
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,16384,1,0,11.5361
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,16,1,0,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,512,1,0,91.5671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,32,1,0,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,64,1,0,0.2900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,128,1,0,0.3968
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,32768,1,0,26.6704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,256,1,0,0.5726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,512,1,0,0.7872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1024,1,0,1.3737
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1536,1,0,1.9726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,2048,1,0,2.5673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,3072,1,0,3.8398
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,4096,1,0,5.1468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,6144,1,0,7.8335
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,8192,1,0,10.6759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,10240,1,0,13.5822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,12288,1,0,16.7747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,0,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,16,1,0,0.2531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,32,1,0,0.2980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,16384,1,0,23.1165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,64,1,0,0.3728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,128,1,0,0.5033
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,256,1,0,0.7874
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,512,1,0,1.3611
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1024,1,0,2.5172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1536,1,0,3.7146
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,2048,1,0,4.9253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,32768,1,0,53.0637
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,3072,1,0,7.4851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,4096,1,0,10.0888
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,6144,1,0,15.5287
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,8192,1,0,21.5788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,0,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,16,1,0,0.2818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,10240,1,0,27.3815
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,32,1,0,0.3755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,64,1,0,0.5069
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,128,1,0,0.7899
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,256,1,0,1.3549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,12288,1,0,33.3576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,512,1,0,2.5035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1024,1,0,4.8590
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,16384,1,0,45.8037
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1536,1,0,7.9893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,2048,1,0,9.6332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,3072,1,0,14.7651
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,4096,1,0,20.0409
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,0,0.2189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,6144,1,0,31.6468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,16,1,0,0.4046
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,32,1,0,0.5039
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,8192,1,0,42.1810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,32768,1,0,106.0127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,64,1,0,0.7918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,128,1,0,1.3630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,10240,1,0,53.8841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,256,1,0,2.4862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,512,1,0,4.8255
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,12288,1,0,66.0343
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1024,1,0,9.4862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1536,1,0,15.7701
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,2048,1,0,20.4519
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,0,0.2340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,16384,1,0,91.4688
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,3072,1,0,29.3929
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,16,1,0,0.5093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,32,1,0,0.7933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,64,1,0,1.3580
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,4096,1,0,39.8823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,128,1,0,2.4962
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,256,1,0,5.8376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,512,1,0,9.4328
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,6144,1,0,61.6439
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1024,1,0,21.5523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,0,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1536,1,0,28.2234
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,16,1,0,0.7901
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,32,1,0,1.3603
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,8192,1,0,85.9468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,64,1,0,2.4943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,2048,1,0,37.7472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,128,1,0,4.7867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,256,1,0,9.4532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,3072,1,0,58.5305
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,0,0.2942
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,512,1,0,22.1935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,16,1,0,1.3576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,4096,1,0,79.6420
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,32,1,0,2.4881
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1024,1,0,37.2460
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,64,1,0,4.7908
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,128,1,0,9.3835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1536,1,0,56.1643
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,256,1,0,18.6402
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,0,0.4047
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,16,1,0,2.4928
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,32,1,0,4.7773
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,2048,1,0,75.2935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,64,1,0,9.3869
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,512,1,0,44.6603
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,0,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,128,1,0,18.5449
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,16,1,0,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,32,1,0,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,128,1,0,0.2889
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,64,1,0,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1024,1,0,74.3307
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,256,1,0,0.3977
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,256,1,0,44.8652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,512,1,0,0.4993
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1024,1,0,0.7845
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1536,1,0,1.0747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,2048,1,0,1.4430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,3072,1,0,2.0008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,4096,1,0,2.6381
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,6144,1,0,3.9520
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,512,1,0,73.8793
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,8192,1,0,5.2924
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,10240,1,0,6.7175
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,12288,1,0,8.1930
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,0,0.2053
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,16384,1,0,11.3233
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,32,1,0,0.2362
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,16,1,0,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,64,1,0,0.2957
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,128,1,0,0.3742
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,256,1,0,0.5012
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,32768,1,0,26.3191
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,512,1,0,0.7791
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1024,1,0,1.3448
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1536,1,0,2.0888
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,2048,1,0,2.5152
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,3072,1,0,3.7676
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,4096,1,0,5.0477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,6144,1,0,7.6651
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,0,0.2144
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,16,1,0,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,8192,1,0,10.4477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,12288,1,0,16.2686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,10240,1,0,13.3273
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,16384,1,0,22.5435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,32,1,0,0.2838
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,64,1,0,0.3754
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,128,1,0,0.5003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,256,1,0,0.9011
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,512,1,0,1.3348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1024,1,0,2.4762
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1536,1,0,3.9844
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,32768,1,0,52.3942
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,2048,1,0,4.8260
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,3072,1,0,7.3125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,4096,1,0,9.9011
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,6144,1,0,15.2314
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,8192,1,0,20.7539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,0,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,16,1,0,0.2930
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,10240,1,0,26.5175
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,32,1,0,0.4017
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,12288,1,0,32.3922
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,64,1,0,0.4984
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,128,1,0,0.7768
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,256,1,0,1.3395
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,512,1,0,2.4640
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1024,1,0,4.7342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,16384,1,0,44.9317
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1536,1,0,7.0294
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,2048,1,0,9.4172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,3072,1,0,15.2260
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,4096,1,0,19.6677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,6144,1,0,30.2818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,0,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,16,1,0,0.3753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,32768,1,0,104.3337
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,8192,1,0,42.1751
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,32,1,0,0.5799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,64,1,0,0.7775
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,10240,1,0,53.4993
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,128,1,0,1.3395
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,256,1,0,2.4555
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,512,1,0,4.7210
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,12288,1,0,64.6869
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1024,1,0,9.2734
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1536,1,0,13.9079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,2048,1,0,18.6146
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,0,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,16,1,0,0.5009
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,16384,1,0,89.8033
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,3072,1,0,28.7571
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,32,1,0,0.7812
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,64,1,0,1.3357
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,128,1,0,2.4405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,4096,1,0,40.6070
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,256,1,0,4.7266
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,512,1,0,10.9663
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,6144,1,0,60.3595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1024,1,0,18.3523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,0,0.2379
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1536,1,0,30.8092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,16,1,0,0.7778
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,8192,1,0,82.5539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,2048,1,0,36.9459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,32,1,0,1.3355
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,64,1,0,2.9682
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,128,1,0,4.6853
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,3072,1,0,57.2690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,256,1,0,9.2391
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,512,1,0,18.1919
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,0,0.3058
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,16,1,0,1.6255
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,32,1,0,2.4354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,4096,1,0,77.9886
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1024,1,0,36.4319
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,64,1,0,4.6818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,128,1,0,9.1536
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1536,1,0,54.9405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,0,0.3707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,256,1,0,18.2900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,16,1,0,3.0347
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,32,1,0,4.6705
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,2048,1,0,73.7340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,512,1,0,36.2109
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,64,1,0,9.1798
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,128,1,0,18.1643
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,256,1,0,36.1099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1024,1,0,72.6539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,0,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,32,1,0,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,16,1,0,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,64,1,0,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,128,1,0,0.2796
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1024,1,0,0.7730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1536,1,0,1.0641
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,2048,1,0,1.3542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,3072,1,0,1.9891
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,512,1,0,87.4848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,4096,1,0,2.6035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,6144,1,0,3.8930
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,256,1,0,0.3951
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,512,1,0,0.4947
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,8192,1,0,5.2569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,10240,1,0,6.6582
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,12288,1,0,8.0914
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,0,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,16,1,0,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,32,1,0,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,64,1,0,0.2907
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,128,1,0,0.3694
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,256,1,0,0.4967
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,512,1,0,0.7681
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,32768,1,0,25.9902
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1024,1,0,1.4922
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1536,1,0,1.9170
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,2048,1,0,2.4901
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,16384,1,0,11.2236
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,3072,1,0,3.7267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,4096,1,0,4.9911
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,6144,1,0,7.5590
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,8192,1,0,10.3457
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,10240,1,0,13.2053
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,0,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,12288,1,0,16.1105
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,16,1,0,0.2358
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,32,1,0,0.2711
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,64,1,0,0.3657
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,16384,1,0,22.3245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,128,1,0,0.5667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,256,1,0,0.7691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,512,1,0,1.3321
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1024,1,0,2.4553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1536,1,0,3.5769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,2048,1,0,4.7628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,3072,1,0,7.2385
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,32768,1,0,51.8249
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,4096,1,0,9.7963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,6144,1,0,15.3935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,8192,1,0,20.5778
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,0,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,16,1,0,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,10240,1,0,26.2642
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,32,1,0,0.3605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,12288,1,0,32.0993
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,64,1,0,0.4950
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,128,1,0,0.7719
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,256,1,0,1.3225
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,512,1,0,2.8677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1024,1,0,4.6856
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,16384,1,0,44.5035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1536,1,0,6.9544
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,2048,1,0,9.2900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,3072,1,0,14.3268
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,4096,1,0,20.1910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,0,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,6144,1,0,29.9519
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,16,1,0,0.3660
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,8192,1,0,40.9469
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,32768,1,0,103.4699
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,32,1,0,0.4973
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,64,1,0,0.9148
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,10240,1,0,52.2450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,128,1,0,1.5947
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,256,1,0,2.4265
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,512,1,0,4.6611
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,12288,1,0,64.0449
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1024,1,0,9.1589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1536,1,0,13.7330
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,2048,1,0,18.3756
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,0,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,16384,1,0,88.9094
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,16,1,0,0.5738
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,3072,1,0,28.4569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,32,1,0,0.7712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,64,1,0,1.3249
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,4096,1,0,38.6612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,128,1,0,2.4163
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,256,1,0,4.6587
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,512,1,0,10.9765
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,6144,1,0,59.7000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1024,1,0,18.1188
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,0,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1536,1,0,27.2792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,16,1,0,0.7739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,8192,1,0,81.6937
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,32,1,0,1.3239
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,2048,1,0,36.5401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,64,1,0,2.9965
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,128,1,0,4.6331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,3072,1,0,56.6309
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,256,1,0,9.1214
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,512,1,0,18.0118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,0,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,16,1,0,1.3226
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,4096,1,0,80.1920
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,32,1,0,2.4158
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1024,1,0,35.9329
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,64,1,0,4.6341
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,128,1,0,9.0713
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1536,1,0,54.2752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,0,0.3634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,16,1,0,2.4164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,256,1,0,18.0007
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,32,1,0,4.6273
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,2048,1,0,72.8446
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,64,1,0,9.0666
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,512,1,0,35.7935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,128,1,0,17.8869
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,1,1,0,0.2371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,16,1,0,0.2958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,32,1,0,0.3208
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,64,1,0,0.3766
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,128,1,0,0.4154
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,256,1,0,35.8430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,256,1,0,0.6053
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1024,1,0,71.8899
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,512,1,0,0.8832
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,1024,1,0,1.5417
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,1536,1,0,2.2126
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,2048,1,0,2.8826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,3072,1,0,4.3133
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,4096,1,0,5.8762
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,6144,1,0,8.4906
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,512,1,0,71.2794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,8192,1,0,11.7313
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,10240,1,0,14.7615
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,1,1,0,0.2462
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,12288,1,0,17.8449
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,16,1,0,0.3219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,32,1,0,0.3746
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,64,1,0,0.4130
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,16384,1,0,24.1034
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,128,1,0,0.5788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,256,1,0,0.8749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,512,1,0,1.5209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,1024,1,0,2.8371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,1536,1,0,4.1803
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,2048,1,0,5.6749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,1,32768,1,0,44.4765
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,3072,1,0,8.2042
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,4096,1,0,11.3482
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,6144,1,0,17.0618
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,1,1,0,0.2553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,8192,1,0,22.9943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,16,1,0,0.3795
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,10240,1,0,28.9544
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,32,1,0,0.4222
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,64,1,0,0.5704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,128,1,0,0.8699
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,12288,1,0,35.0127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,256,1,0,1.5076
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,16384,1,0,40.2792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,512,1,0,2.8131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,1024,1,0,5.7175
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,1536,1,0,8.0353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,2048,1,0,11.0215
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,3072,1,0,16.6544
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,4096,1,0,22.2690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,2,32768,1,0,87.5893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,6144,1,0,33.8098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,1,1,0,0.2691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,8192,1,0,38.1692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,16,1,0,0.4164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,32,1,0,0.5714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,64,1,0,0.8689
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,10240,1,0,48.3015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,128,1,0,1.5055
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,256,1,0,2.8084
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,12288,1,0,58.5888
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,512,1,0,5.5646
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,1024,1,0,10.9379
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,1536,1,0,17.0430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,16384,1,0,79.7781
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,2048,1,0,22.0120
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,3072,1,0,33.5245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,4096,1,0,37.1751
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,1,1,0,0.2801
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,16,1,0,0.5743
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,6144,1,0,55.0835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,32,1,0,0.8686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,64,1,0,1.5015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,128,1,0,2.7892
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,8192,1,0,76.2616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,4,32768,1,0,171.3044
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,256,1,0,5.5566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,512,1,0,10.8877
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,10240,1,0,94.9733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,1024,1,0,21.5652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,12288,1,0,114.7471
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,1536,1,0,33.7166
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,2048,1,0,36.0098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,1,1,0,0.3086
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,16,1,0,0.8697
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,3072,1,0,56.6126
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,32,1,0,1.5035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,8,16384,1,0,156.7121
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,64,1,0,2.7942
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,128,1,0,5.5501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,4096,1,0,74.2078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,256,1,0,10.8750
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,512,1,0,21.5300
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,1024,1,0,35.6284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,6144,1,0,106.0441
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,1,1,0,0.3584
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,1536,1,0,53.5633
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,16,1,0,1.5718
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,32,1,0,2.7971
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,64,1,0,5.5373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,2048,1,0,71.7466
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,16,8192,1,0,149.7932
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,128,1,0,12.1124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,256,1,0,21.4770
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,1,1,0,0.4591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,512,1,0,39.0411
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,16,1,0,2.9323
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,3072,1,0,107.3243
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,32,1,0,5.5395
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,64,1,0,10.8621
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,1024,1,0,71.0736
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,128,1,0,21.4599
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,32,4096,1,0,145.0868
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,256,1,0,35.5588
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,1,1,0,0.7137
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,16,1,0,5.8523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,32,1,0,10.8574
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,1536,1,0,104.7061
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,64,1,0,21.4297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,1,1,0,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,512,1,0,70.7284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,128,1,0,35.4455
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,16,1,0,0.2765
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,32,1,0,0.3018
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,64,2048,1,0,143.1319
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,64,1,0,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,128,1,0,0.3749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,256,1,0,0.5033
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,512,1,0,0.7494
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,1024,1,0,1.3045
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,1536,1,0,1.8613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,256,1,0,70.8991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,2048,1,0,2.4166
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,3072,1,0,3.5616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,4096,1,0,4.6887
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,6144,1,0,7.1085
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,8192,1,0,9.4476
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,10240,1,0,11.8897
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,12288,1,0,14.3549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,128,1024,1,0,150.2437
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,1,1,0,0.2090
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,16,1,0,0.3029
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,16384,1,0,19.5680
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,32,1,0,0.3527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,64,1,0,0.3904
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,128,1,0,0.4977
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,256,1,0,0.7392
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,512,1,0,1.3315
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,1024,1,0,2.3599
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,1,32768,1,0,42.6160
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,128,256,512,1,0,138.6397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,1536,1,0,3.5972
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,2048,1,0,4.5164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,3072,1,0,6.7714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,4096,1,0,9.2093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,6144,1,0,13.6544
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,8192,1,0,18.4156
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,1,1,0,0.2169
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,16,1,0,0.3501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,10240,1,0,23.2887
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,32,1,0,0.3803
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,12288,1,0,28.2230
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,64,1,0,0.4959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,128,1,0,0.7325
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,256,1,0,1.2781
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,16384,1,0,38.4141
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,512,1,0,2.3328
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,1024,1,0,4.4290
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,1536,1,0,6.5833
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,2048,1,0,8.7539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,3072,1,0,13.1993
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,4096,1,0,17.7663
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,2,32768,1,0,76.6430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,6144,1,0,26.9366
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,1,1,0,0.2269
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,8192,1,0,36.7297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,16,1,0,0.3730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,32,1,0,0.4991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,10240,1,0,46.4407
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,64,1,0,0.7295
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,128,1,0,1.2712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,256,1,0,2.3275
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,12288,1,0,55.7944
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,512,1,0,4.4060
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,16384,1,0,68.8730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,1024,1,0,8.6482
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,1536,1,0,12.8853
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,2048,1,0,17.1815
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,3072,1,0,26.2950
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,4096,1,0,35.9296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,1,1,0,0.2404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,6144,1,0,53.7183
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,16,1,0,0.5223
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,32,1,0,0.7305
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,64,1,0,1.2661
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,128,1,0,2.3206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,4,32768,1,0,152.0168
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,256,1,0,4.4173
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,8192,1,0,65.3789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,10240,1,0,83.0356
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,512,1,0,8.5759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,1024,1,0,17.0272
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,12288,1,0,101.2795
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,1536,1,0,25.6051
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,2048,1,0,34.1148
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,1,1,0,0.2493
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,16,1,0,0.7321
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,3072,1,0,52.2052
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,32,1,0,1.2697
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,8,16384,1,0,137.5481
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,64,1,0,2.3175
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,128,1,0,4.3916
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,4096,1,0,63.1797
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,256,1,0,8.6047
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,512,1,0,18.7686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,6144,1,0,96.3616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,1,1,0,0.2776
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,1024,1,0,36.7473
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,1536,1,0,50.8508
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,16,1,0,1.2704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,32,1,0,2.3131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,64,1,0,4.3755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,2048,1,0,61.9610
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,16,8192,1,0,130.4651
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,128,1,0,8.5622
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,256,1,0,18.8571
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,1,1,0,0.3306
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,3072,1,0,93.3101
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,512,1,0,33.6545
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,16,1,0,2.3210
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,32,1,0,4.3784
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,64,1,0,9.2860
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,32,4096,1,0,126.0963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,1024,1,0,65.9679
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,128,1,0,16.9010
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,1,1,0,0.4701
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,16,1,0,4.3738
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,256,1,0,37.4717
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,1536,1,0,90.4553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,32,1,0,8.5579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,1,1,0,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,64,1,0,16.9162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,512,1,0,59.7429
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,64,2048,1,0,121.3206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,16,1,0,0.2558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,32,1,0,0.2858
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,64,1,0,0.3104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,128,1,0,38.1572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,128,1,0,0.3416
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,256,1,0,0.4441
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,512,1,0,0.6203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,1024,1,0,1.0468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,1536,1,0,1.4882
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,256,1,0,59.9676
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,2048,1,0,1.9171
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,3072,1,0,2.8209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,4096,1,0,3.6980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,6144,1,0,5.6012
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,128,1024,1,0,120.0203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,8192,1,0,7.4806
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,10240,1,0,9.3825
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,1,1,0,0.1963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,12288,1,0,11.3794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,16,1,0,0.2818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,16384,1,0,15.6245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,32,1,0,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,64,1,0,0.3351
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,128,1,0,0.4378
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,64,256,512,1,0,119.5230
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,256,1,0,0.6828
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,512,1,0,1.0283
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,1024,1,0,1.8641
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,1536,1,0,2.8650
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,2048,1,0,3.5769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,1,32768,1,0,34.7542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,3072,1,0,5.2515
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,4096,1,0,7.0259
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,6144,1,0,10.6764
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,8192,1,0,14.4569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,1,1,0,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,10240,1,0,18.4043
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,16,1,0,0.3251
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,12288,1,0,22.2690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,32,1,0,0.3543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,64,1,0,0.4380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,128,1,0,0.6070
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,16384,1,0,30.5061
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,256,1,0,1.0200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,512,1,0,1.8367
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,1024,1,0,3.7968
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,1536,1,0,5.0749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,2048,1,0,7.1235
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,3072,1,0,10.1968
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,4096,1,0,13.8735
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,2,32768,1,0,68.2008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,6144,1,0,21.0459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,8192,1,0,28.4529
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,1,1,0,0.2239
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,16,1,0,0.3371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,32,1,0,0.4367
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,10240,1,0,36.1316
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,64,1,0,0.6464
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,128,1,0,1.0107
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,12288,1,0,43.9449
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,256,1,0,1.8222
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,512,1,0,3.8799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,1024,1,0,7.3842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,16384,1,0,60.3408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,1536,1,0,10.1927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,2048,1,0,13.1951
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,3072,1,0,20.3143
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,4096,1,0,27.4086
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,1,1,0,0.2313
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,6144,1,0,41.9023
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,16,1,0,0.4372
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,32,1,0,0.6104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,4,32768,1,0,128.1693
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,64,1,0,1.0153
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,8192,1,0,57.5344
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,128,1,0,1.8186
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,256,1,0,3.9060
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,10240,1,0,72.1198
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,512,1,0,7.5501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,1024,1,0,13.0097
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,12288,1,0,88.5001
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,1536,1,0,21.1519
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,2048,1,0,26.2350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,1,1,0,0.2454
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,16,1,0,0.6106
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,8,16384,1,0,113.1389
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,3072,1,0,40.3350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,32,1,0,1.1520
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,64,1,0,1.9906
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,128,1,0,3.3874
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,4096,1,0,55.1262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,256,1,0,6.5979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,512,1,0,14.8759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,1024,1,0,25.8837
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,1,1,0,0.2678
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,6144,1,0,85.1565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,1536,1,0,38.9899
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,16,1,0,1.1602
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,32,1,0,2.1155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,16,8192,1,0,106.1448
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,64,1,0,3.3936
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,2048,1,0,53.2228
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,128,1,0,6.5538
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,256,1,0,12.9964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,1,1,0,0.3159
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,3072,1,0,80.5914
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,512,1,0,25.7883
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,16,1,0,2.1103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,32,1,0,3.3807
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,32,4096,1,0,101.6010
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,64,1,0,7.2774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,128,1,0,12.9466
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,1024,1,0,51.4517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,1,1,0,0.4373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,256,1,0,30.0157
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,16,1,0,3.9692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,1536,1,0,77.6410
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,32,1,0,7.9369
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,64,1,0,14.4388
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,64,2048,1,0,96.8122
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,1,1,0,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,512,1,0,51.2876
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,128,1,0,25.6951
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,16,1,0,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,32,1,0,0.2611
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,64,1,0,0.2984
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,128,1,0,0.3212
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,256,1,0,0.4087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,512,1,0,0.6206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,1536,1,0,1.2808
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,1024,1,0,0.9346
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,2048,1,0,1.6600
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,256,1,0,51.2228
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,3072,1,0,2.4296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,4096,1,0,3.2042
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,6144,1,0,4.7964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,128,1024,1,0,107.2115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,8192,1,0,6.4664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,10240,1,0,8.1891
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,12288,1,0,9.8535
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,1,1,0,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,16,1,0,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,16384,1,0,13.5753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,32,1,0,0.3007
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,32,256,512,1,0,95.0740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,64,1,0,0.3151
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,128,1,0,0.4228
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,256,1,0,0.5591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,512,1,0,0.8929
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,1024,1,0,1.7547
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,1,32768,1,0,30.7626
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,1536,1,0,2.3093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,2048,1,0,3.0357
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,3072,1,0,4.5193
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,4096,1,0,6.0476
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,6144,1,0,9.1527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,1,1,0,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,8192,1,0,12.4522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,16,1,0,0.3008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,10240,1,0,15.8241
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,32,1,0,0.3183
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,12288,1,0,19.2898
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,64,1,0,0.4061
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,128,1,0,0.5557
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,16384,1,0,26.5420
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,256,1,0,1.0079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,512,1,0,1.5713
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,1024,1,0,2.9447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,1536,1,0,4.4616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,2048,1,0,5.7434
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,3072,1,0,8.7089
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,4096,1,0,11.7474
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,2,32768,1,0,60.3828
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,6144,1,0,18.0275
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,8192,1,0,24.4857
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,1,1,0,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,16,1,0,0.3109
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,10240,1,0,31.1829
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,32,1,0,0.4103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,64,1,0,0.5534
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,12288,1,0,37.9946
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,128,1,0,0.8812
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,256,1,0,1.5704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,512,1,0,2.9153
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,1024,1,0,5.6380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,16384,1,0,52.6076
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,1536,1,0,8.3980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,2048,1,0,11.1768
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,3072,1,0,17.5479
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,4096,1,0,23.4383
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,1,1,0,0.2235
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,16,1,0,0.4209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,6144,1,0,35.9400
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,32,1,0,0.5539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,8192,1,0,48.8488
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,64,1,0,0.8819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,128,1,0,1.5591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,4,32768,1,0,119.1475
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,256,1,0,2.9034
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,10240,1,0,62.5786
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,512,1,0,5.5920
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,1024,1,0,11.0273
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,12288,1,0,75.9518
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,1536,1,0,17.1895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,2048,1,0,22.2283
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,1,1,0,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,16,1,0,0.5565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,3072,1,0,34.4056
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,8,16384,1,0,104.6406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,32,1,0,0.8823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,64,1,0,1.5570
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,128,1,0,2.8995
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,4096,1,0,46.6897
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,256,1,0,5.5959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,512,1,0,10.9582
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,1024,1,0,21.8784
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,6144,1,0,73.0270
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,1,1,0,0.2601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,1536,1,0,33.0562
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,16,1,0,0.8821
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,32,1,0,1.5578
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,64,1,0,2.8896
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,2048,1,0,44.3032
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,16,8192,1,0,97.6640
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,128,1,0,5.5766
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,256,1,0,10.9131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,3072,1,0,68.5122
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,1,1,0,0.3017
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,512,1,0,23.5110
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,16,1,0,1.5565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,32,1,0,2.8896
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,1024,1,0,43.6100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,64,1,0,5.5654
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,32,4096,1,0,93.1116
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,128,1,0,10.9172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,1,1,0,0.4121
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,256,1,0,21.7078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,1536,1,0,65.7365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,16,1,0,2.8882
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,32,1,0,5.5540
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,64,1,0,10.9284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,1,1,0,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,64,2048,1,0,88.3243
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,512,1,0,43.3401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,128,1,0,21.6790
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,16,1,0,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,32,1,0,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,64,1,0,0.2982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,128,1,0,0.3041
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,256,1,0,0.3964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,512,1,0,0.5401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,256,1,0,43.5148
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,1024,1,0,0.8508
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,1536,1,0,1.1768
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,2048,1,0,1.5114
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,128,1024,1,0,86.9292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,3072,1,0,2.2347
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,4096,1,0,2.9569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,6144,1,0,4.4366
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,8192,1,0,5.9493
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,10240,1,0,7.5056
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,12288,1,0,9.1302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,16,256,512,1,0,86.4982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,1,1,0,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,16384,1,0,12.5893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,16,1,0,0.2568
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,32,1,0,0.2936
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,64,1,0,0.3000
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,128,1,0,0.3933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,1,32768,1,0,28.7558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,256,1,0,0.5301
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,512,1,0,0.8373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,1024,1,0,1.4711
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,1536,1,0,2.3040
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,2048,1,0,2.9299
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,3072,1,0,4.3139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,4096,1,0,5.7328
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,6144,1,0,8.5926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,8192,1,0,11.4374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,1,1,0,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,10240,1,0,14.7548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,16,1,0,0.2953
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,12288,1,0,17.7866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,32,1,0,0.2981
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,16384,1,0,24.5628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,64,1,0,0.3920
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,128,1,0,0.5288
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,256,1,0,0.9516
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,512,1,0,1.4499
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,1024,1,0,2.7067
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,1536,1,0,3.9798
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,2048,1,0,5.5960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,3072,1,0,7.9447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,2,32768,1,0,56.3005
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,4096,1,0,10.7589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,6144,1,0,16.5200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,8192,1,0,22.5172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,1,1,0,0.2075
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,16,1,0,0.2936
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,10240,1,0,28.7370
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,32,1,0,0.4236
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,64,1,0,0.5286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,128,1,0,0.8260
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,12288,1,0,35.0294
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,256,1,0,1.4351
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,512,1,0,2.6725
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,1024,1,0,5.1473
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,16384,1,0,48.4444
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,1536,1,0,8.3966
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,2048,1,0,10.9610
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,3072,1,0,16.0797
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,4096,1,0,21.4260
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,1,1,0,0.2206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,6144,1,0,33.0084
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,16,1,0,0.3919
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,32,1,0,0.5656
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,8192,1,0,45.5980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,4,32768,1,0,111.3467
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,64,1,0,0.8225
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,128,1,0,1.4327
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,256,1,0,3.2090
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,10240,1,0,57.5104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,512,1,0,5.1026
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,1024,1,0,10.0332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,12288,1,0,70.0381
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,1536,1,0,15.1514
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,2048,1,0,20.2261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,1,1,0,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,16,1,0,0.5327
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,3072,1,0,31.9145
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,32,1,0,0.8227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,8,16384,1,0,96.7839
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,64,1,0,1.4328
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,4096,1,0,42.7346
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,128,1,0,2.6493
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,256,1,0,5.0687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,512,1,0,11.8799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,6144,1,0,67.1685
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,1024,1,0,19.9453
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,1,1,0,0.2509
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,1536,1,0,33.1201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,16,1,0,0.8286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,32,1,0,1.4303
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,2048,1,0,40.3475
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,16,8192,1,0,89.8031
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,64,1,0,3.2397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,128,1,0,5.0727
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,256,1,0,10.0167
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,3072,1,0,62.6013
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,1,1,0,0.2897
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,512,1,0,19.8342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,16,1,0,1.4332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,32,1,0,2.6434
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,32,4096,1,0,85.0030
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,1024,1,0,39.7080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,64,1,0,5.0740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,128,1,0,9.9338
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,1,1,0,0.3970
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,256,1,0,19.8550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,16,1,0,2.6354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,1536,1,0,59.7630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,32,1,0,5.0652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,64,2048,1,0,80.4342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,64,1,0,9.9531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,512,1,0,39.4216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,1,1,0,0.1796
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,128,1,0,19.7061
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,16,1,0,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,32,1,0,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,64,1,0,0.3020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,128,1,0,0.2934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,256,1,0,39.5551
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,256,1,0,0.3910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,512,1,0,0.5252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,1024,1,0,0.8267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,1536,1,0,1.1357
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,2048,1,0,1.4495
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,128,1024,1,0,78.9461
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,3072,1,0,2.1436
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,4096,1,0,2.8298
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,6144,1,0,4.3015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,8192,1,0,5.7172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,10240,1,0,7.2208
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,12288,1,0,8.7810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,8,256,512,1,0,78.5440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,1,1,0,0.1917
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,16384,1,0,12.1019
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,16,1,0,0.2531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,32,1,0,0.2975
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,64,1,0,0.2916
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,128,1,0,0.3828
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,512,1,0,0.8100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,256,1,0,0.5185
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,1,32768,1,0,27.7748
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,1024,1,0,1.5344
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,1536,1,0,2.0340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,2048,1,0,2.7990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,3072,1,0,4.1268
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,4096,1,0,5.3069
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,6144,1,0,8.0624
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,8192,1,0,11.0410
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,10240,1,0,14.0195
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,1,1,0,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,12288,1,0,17.2071
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,16,1,0,0.2919
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,16384,1,0,23.5791
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,32,1,0,0.3068
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,64,1,0,0.3797
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,128,1,0,0.5147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,256,1,0,0.9290
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,512,1,0,1.3872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,1024,1,0,2.5802
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,1536,1,0,3.7785
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,2048,1,0,5.3517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,3072,1,0,7.7518
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,2,32768,1,0,54.4365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,4096,1,0,10.3892
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,6144,1,0,16.1828
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,8192,1,0,21.5236
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,1,1,0,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,16,1,0,0.2918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,10240,1,0,27.5178
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,32,1,0,0.4144
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,256,1,0,1.3765
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,64,1,0,0.5159
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,12288,1,0,33.6943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,128,1,0,0.8592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,512,1,0,2.5379
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,16384,1,0,46.4835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,1024,1,0,4.9017
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,1536,1,0,7.3090
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,2048,1,0,9.7353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,3072,1,0,15.0867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,4096,1,0,20.4478
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,1,1,0,0.2090
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,6144,1,0,31.7186
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,16,1,0,0.4100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,8192,1,0,42.9721
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,4,32768,1,0,107.6121
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,32,1,0,0.5137
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,64,1,0,0.7995
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,128,1,0,1.3710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,10240,1,0,55.5525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,256,1,0,2.5328
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,512,1,0,5.2340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,1024,1,0,10.9978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,12288,1,0,67.1291
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,1536,1,0,14.9395
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,2048,1,0,19.9035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,1,1,0,0.2288
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,16,1,0,0.5163
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,3072,1,0,29.9204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,8,16384,1,0,92.8912
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,32,1,0,0.7987
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,64,1,0,1.3727
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,4096,1,0,40.7773
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,128,1,0,2.8020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,256,1,0,4.8670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,512,1,0,11.5173
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,1024,1,0,18.9207
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,6144,1,0,64.1818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,1,1,0,0.2510
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,16,1,0,0.7988
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,1536,1,0,28.5945
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,32,1,0,1.3731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,64,1,0,2.5111
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,16,8192,1,0,85.8695
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,2048,1,0,41.2110
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,128,1,0,4.8269
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,256,1,0,9.5236
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,3072,1,0,59.6858
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,1,1,0,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,512,1,0,18.7691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,16,1,0,1.3733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,32,1,0,2.8298
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,32,4096,1,0,82.1916
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,1024,1,0,37.6670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,64,1,0,4.8292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,128,1,0,9.4443
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,256,1,0,18.8382
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,1,1,0,0.3893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,16,1,0,2.5182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,1536,1,0,62.8767
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,32,1,0,4.8209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,64,1,0,9.4552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,512,1,0,37.4842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,1,1,0,0.1726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,64,2048,1,0,83.0622
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,128,1,0,18.7521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,16,1,0,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,32,1,0,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,64,1,0,0.2891
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,128,1,0,0.2991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,256,1,0,0.3865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,256,1,0,37.4774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,512,1,0,0.5176
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,128,1024,1,0,75.2463
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,1536,1,0,1.1193
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,1024,1,0,0.8079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,2048,1,0,1.4960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,3072,1,0,2.0811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,4096,1,0,2.7541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,6144,1,0,4.1552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,8192,1,0,5.5927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,10240,1,0,7.1178
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,12288,1,0,8.5815
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,1,1,0,0.1808
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,16384,1,0,11.8899
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,16,1,0,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,4,256,512,1,0,91.0213
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,32,1,0,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,64,1,0,0.2924
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,128,1,0,0.3850
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,1,32768,1,0,27.3169
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,512,1,0,0.9105
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,1024,1,0,1.3810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,256,1,0,0.5112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,1536,1,0,2.1191
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,2048,1,0,2.5865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,3072,1,0,3.8822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,4096,1,0,5.1793
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,6144,1,0,8.0321
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,8192,1,0,10.7347
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,10240,1,0,13.7014
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,12288,1,0,16.6803
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,1,1,0,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,16,1,0,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,32,1,0,0.2902
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,64,1,0,0.4112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,16384,1,0,23.0915
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,128,1,0,0.5088
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,256,1,0,0.8977
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,512,1,0,1.3613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,1024,1,0,2.6618
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,1536,1,0,3.6954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,2048,1,0,4.8934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,3072,1,0,7.7549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,2,32768,1,0,53.5208
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,4096,1,0,10.0351
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,6144,1,0,15.4410
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,8192,1,0,21.0407
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,1,1,0,0.2030
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,16,1,0,0.2912
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,10240,1,0,26.9031
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,32,1,0,0.3952
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,12288,1,0,33.2474
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,64,1,0,0.5081
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,128,1,0,0.7845
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,256,1,0,1.3515
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,512,1,0,2.6802
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,16384,1,0,45.5399
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,1024,1,0,4.7867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,1536,1,0,7.1052
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,2048,1,0,9.4573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,3072,1,0,14.9636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,4096,1,0,19.9676
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,1,1,0,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,6144,1,0,30.7844
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,16,1,0,0.3778
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,32,1,0,0.5073
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,8192,1,0,42.0037
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,64,1,0,0.7829
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,4,32768,1,0,105.4717
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,128,1,0,1.3481
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,10240,1,0,53.6165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,256,1,0,3.0382
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,1024,1,0,9.3174
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,512,1,0,4.7244
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,12288,1,0,66.4062
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,1536,1,0,14.0703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,2048,1,0,20.2526
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,16,1,0,0.5082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,1,1,0,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,8,16384,1,0,90.9400
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,3072,1,0,29.1852
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,32,1,0,0.7850
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,64,1,0,1.3458
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,128,1,0,2.4558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,4096,1,0,39.7783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,256,1,0,5.3380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,512,1,0,9.2190
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,1024,1,0,21.3684
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,6144,1,0,61.4091
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,1,1,0,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,1536,1,0,27.8519
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,16,1,0,0.9541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,16,8192,1,0,83.9380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,32,1,0,1.3472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,2048,1,0,37.3720
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,64,1,0,2.4514
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,128,1,0,4.7062
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,3072,1,0,61.5032
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,256,1,0,11.2941
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,1,1,0,0.2851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,512,1,0,18.3350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,16,1,0,1.3460
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,32,1,0,3.1460
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,32,4096,1,0,79.2584
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,1024,1,0,36.7078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,64,1,0,4.7130
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,128,1,0,9.2196
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,1536,1,0,55.3328
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,1,1,0,0.3879
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,16,1,0,2.4573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,256,1,0,23.0451
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,32,1,0,4.7023
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,64,2048,1,0,80.6680
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,64,1,0,9.2114
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,512,1,0,43.8533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,1,1,0,0.1740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,128,1,0,18.2744
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,16,1,0,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,32,1,0,0.2466
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,64,1,0,0.2851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,128,1,0,0.3038
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,256,1,0,36.3661
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,256,1,0,0.3855
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,1024,1,0,0.8051
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,512,1,0,0.5127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,128,1024,1,0,78.4124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,1536,1,0,1.1723
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,2048,1,0,1.4219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,3072,1,0,2.0645
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,4096,1,0,2.7370
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,6144,1,0,4.1112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,8192,1,0,5.5261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,10240,1,0,6.9761
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,2,256,512,1,0,72.7902
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,12288,1,0,8.5021
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,1,1,0,0.1732
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,16384,1,0,11.7369
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,16,1,0,0.2502
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,32,1,0,0.2881
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,64,1,0,0.3077
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,128,1,0,0.3761
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,512,1,0,0.8312
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,256,1,0,0.5829
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,1,32768,1,0,27.1195
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,1024,1,0,1.3579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,1536,1,0,1.9564
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,2048,1,0,2.7051
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,3072,1,0,3.8340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,4096,1,0,5.1174
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,6144,1,0,7.7800
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,8192,1,0,10.5947
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,10240,1,0,13.5339
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,1,1,0,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,12288,1,0,16.5572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,16,1,0,0.2773
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,32,1,0,0.2950
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,16384,1,0,23.0066
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,64,1,0,0.4154
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,128,1,0,0.5055
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,256,1,0,0.7823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,512,1,0,1.3381
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,1024,1,0,2.4740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,1536,1,0,3.6420
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,2048,1,0,4.8227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,3072,1,0,7.3406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,2,32768,1,0,52.8331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,4096,1,0,10.2349
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,6144,1,0,15.2562
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,8192,1,0,20.7809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,1,1,0,0.2025
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,16,1,0,0.2924
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,32,1,0,0.3762
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,10240,1,0,26.5841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,64,1,0,0.5009
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,12288,1,0,32.4703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,128,1,0,0.7856
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,256,1,0,1.5803
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,512,1,0,2.6890
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,1024,1,0,4.7118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,16384,1,0,45.3529
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,1536,1,0,7.7572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,2048,1,0,9.3318
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,3072,1,0,15.2700
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,4096,1,0,19.6959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,6144,1,0,31.0712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,1,1,0,0.2116
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,16,1,0,0.3875
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,8192,1,0,41.5117
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,32,1,0,0.5782
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,64,1,0,0.8663
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,4,32768,1,0,104.4847
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,128,1,0,1.3313
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,10240,1,0,52.9669
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,256,1,0,2.9525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,512,1,0,4.6823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,1024,1,0,9.1997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,12288,1,0,64.8667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,1536,1,0,13.8603
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,2048,1,0,18.5437
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,1,1,0,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,16,1,0,0.5053
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,8,16384,1,0,90.2206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,3072,1,0,28.8089
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,32,1,0,0.7820
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,64,1,0,1.4730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,128,1,0,2.4204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,4096,1,0,40.8647
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,256,1,0,4.6827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,512,1,0,9.9336
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,6144,1,0,60.6739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,1024,1,0,18.2120
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,1,1,0,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,1536,1,0,27.4861
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,16,1,0,0.7840
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,32,1,0,1.3305
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,16,8192,1,0,82.7978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,2048,1,0,36.9165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,64,1,0,2.4198
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,128,1,0,4.6403
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,256,1,0,9.1507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,512,1,0,18.0667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,3072,1,0,57.4501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,1,1,0,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,16,1,0,1.3342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,32,4096,1,0,78.2259
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,32,1,0,2.4259
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,1024,1,0,36.2459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,64,1,0,5.9699
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,128,1,0,9.0799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,1,1,0,0.3833
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,256,1,0,18.1575
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,1536,1,0,54.6931
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,16,1,0,3.1269
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,32,1,0,4.6419
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,64,2048,1,0,73.3726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,64,1,0,9.0824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,512,1,0,36.0001
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,0,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,128,1,0,17.9982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,16,1,0,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,32,1,0,0.2566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,64,1,0,0.2743
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,256,1,0,35.9899
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,128,1,0,0.3490
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,256,1,0,0.4789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,512,1,0,0.7586
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,128,1024,1,0,72.1918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1024,1,0,1.3250
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1536,1,0,1.9856
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,2048,1,0,2.5063
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,3072,1,0,3.7513
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,4096,1,0,4.9620
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,6144,1,0,7.4634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,fp8_block,1,256,512,1,0,71.7331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,8192,1,0,10.0209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,10240,1,0,12.5773
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,12288,1,0,15.1974
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,0,0.2124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,16,1,0,0.2539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,32,1,0,0.2788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,16384,1,0,20.6874
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,64,1,0,0.3487
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,128,1,0,0.4824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,256,1,0,0.8231
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,512,1,0,1.3176
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1024,1,0,2.4842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,32768,1,0,37.6074
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1536,1,0,3.6804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,2048,1,0,4.8676
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,3072,1,0,7.2823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,4096,1,0,9.7217
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,6144,1,0,14.6876
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,8192,1,0,19.7779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,0,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,16,1,0,0.2813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,32,1,0,0.3616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,10240,1,0,25.0139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,128,1,0,0.7563
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,64,1,0,0.4801
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,12288,1,0,30.3078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,16384,1,0,33.9735
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,256,1,0,1.3198
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,512,1,0,2.4708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1024,1,0,4.8189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1536,1,0,7.1484
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,2048,1,0,9.4863
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,3072,1,0,14.3296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,4096,1,0,19.2566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,32768,1,0,75.1267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,6144,1,0,29.2332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,8192,1,0,32.5792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,0,0.2308
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,16,1,0,0.3481
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,10240,1,0,40.7424
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,32,1,0,0.4832
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,64,1,0,0.7560
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,128,1,0,1.3201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,12288,1,0,49.5101
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,256,1,0,2.4759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,512,1,0,4.8012
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1024,1,0,9.4157
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,16384,1,0,67.7661
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1536,1,0,14.8366
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,2048,1,0,18.7817
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,3072,1,0,28.4929
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,4096,1,0,31.0635
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,0,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,16,1,0,0.4828
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,6144,1,0,47.3909
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,32,1,0,0.7590
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,64,1,0,1.3205
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,8192,1,0,64.1645
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,128,1,0,2.4696
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,32768,1,0,147.3131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,256,1,0,4.8021
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,10240,1,0,81.4428
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,512,1,0,9.4131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1024,1,0,18.5942
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,12288,1,0,96.9311
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1536,1,0,27.9321
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,2048,1,0,30.0115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,0,0.2501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,16,1,0,0.7602
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,3072,1,0,45.8505
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,32,1,0,1.3216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,64,1,0,2.4673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,16384,1,0,132.8186
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,128,1,0,4.7896
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,4096,1,0,61.8992
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,256,1,0,9.4113
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,512,1,0,18.5746
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,6144,1,0,92.5542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1024,1,0,32.7683
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,0,0.2739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1536,1,0,44.6848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,16,1,0,1.3218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,32,1,0,2.4652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,64,1,0,4.7900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,2048,1,0,59.8476
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,8192,1,0,125.4372
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,128,1,0,9.3747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,256,1,0,18.6039
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,0,0.3480
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,3072,1,0,89.3586
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,512,1,0,29.6241
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,16,1,0,2.4685
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,32,1,0,4.7806
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,4096,1,0,120.9491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,64,1,0,9.3553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1024,1,0,59.3304
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,128,1,0,18.5075
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,0,0.4794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1536,1,0,87.0421
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,256,1,0,29.6762
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,16,1,0,4.7798
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,32,1,0,9.3629
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,64,1,0,18.5288
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,512,1,0,59.0158
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,0,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,2048,1,0,116.6767
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,128,1,0,29.5391
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,16,1,0,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,32,1,0,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,64,1,0,0.2524
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,128,1,0,0.3106
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,256,1,0,0.4163
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,512,1,0,0.6227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1024,1,0,1.1040
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,256,1,0,58.9653
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1536,1,0,1.5716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,2048,1,0,2.0392
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,3072,1,0,3.0223
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1024,1,0,115.5705
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,4096,1,0,4.0047
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,6144,1,0,6.0427
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,8192,1,0,8.0994
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,10240,1,0,10.2125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,12288,1,0,12.3772
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,0,0.1958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,16,1,0,0.2291
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,16384,1,0,16.9342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,512,1,0,115.1867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,32,1,0,0.2534
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,64,1,0,0.3089
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,128,1,0,0.4172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,256,1,0,0.6219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,512,1,0,1.0966
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1024,1,0,2.0190
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1536,1,0,2.9459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,2048,1,0,3.8903
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,32768,1,0,37.3834
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,3072,1,0,5.8540
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,4096,1,0,7.8413
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,6144,1,0,11.8561
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,0,0.1941
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,8192,1,0,16.0671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,16,1,0,0.2544
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,10240,1,0,20.3266
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,12288,1,0,24.6560
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,32,1,0,0.3115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,64,1,0,0.4157
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,128,1,0,0.6253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,256,1,0,1.0942
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,16384,1,0,33.7073
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,512,1,0,2.0112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1024,1,0,3.8392
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1536,1,0,5.7305
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,2048,1,0,7.6091
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,3072,1,0,11.5095
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,32768,1,0,67.3813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,4096,1,0,15.5047
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,6144,1,0,23.5916
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,8192,1,0,31.9399
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,0,0.2082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,16,1,0,0.3099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,32,1,0,0.4151
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,10240,1,0,40.4946
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,64,1,0,0.6257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,128,1,0,1.0940
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,256,1,0,2.0056
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,12288,1,0,49.1817
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,512,1,0,3.8127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1024,1,0,7.5249
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,16384,1,0,60.0937
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1536,1,0,11.2542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,2048,1,0,15.0177
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,3072,1,0,22.8704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,4096,1,0,30.8152
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,0,0.2186
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,16,1,0,0.4182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,6144,1,0,47.0403
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,32,1,0,0.6282
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,32768,1,0,134.6049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,8192,1,0,56.4623
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,64,1,0,1.0945
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,128,1,0,2.0040
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,256,1,0,3.8104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,10240,1,0,71.7636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,512,1,0,7.5037
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,12288,1,0,87.3074
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1024,1,0,14.8893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1536,1,0,22.2918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,2048,1,0,29.7895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,0,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,16,1,0,0.6245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,16384,1,0,120.0540
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,32,1,0,1.0955
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,3072,1,0,45.5206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,64,1,0,2.0007
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,4096,1,0,54.2152
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,128,1,0,3.8139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,256,1,0,7.5339
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,512,1,0,14.8258
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,6144,1,0,83.0331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1024,1,0,29.4590
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,0,0.2524
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1536,1,0,44.3606
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,16,1,0,1.0985
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,32,1,0,1.9964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,64,1,0,3.8131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,2048,1,0,52.0374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,8192,1,0,112.8111
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,128,1,0,7.4865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,256,1,0,14.8361
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,3072,1,0,79.9864
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,0,0.3083
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,512,1,0,29.3506
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,16,1,0,2.0061
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,32,1,0,3.8073
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,4096,1,0,108.2717
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,64,1,0,7.4785
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1024,1,0,51.5257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,128,1,0,14.7528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,0,0.4155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,16,1,0,3.8101
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,256,1,0,29.4376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1536,1,0,77.5696
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,32,1,0,7.4755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,64,1,0,14.7646
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,512,1,0,51.2743
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,2048,1,0,103.9334
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,0,0.1698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,128,1,0,29.3228
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,16,1,0,0.2042
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,32,1,0,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,64,1,0,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,128,1,0,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,256,1,0,0.3711
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,512,1,0,0.5300
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,256,1,0,51.3414
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1024,1,0,0.8929
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1536,1,0,1.2818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,2048,1,0,1.6459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1024,1,0,102.9286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,3072,1,0,2.4424
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,4096,1,0,3.2162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,6144,1,0,4.8440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,8192,1,0,6.5028
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,12288,1,0,10.0198
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,10240,1,0,8.2512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,0,0.1798
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,16,1,0,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,16384,1,0,13.8408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,512,1,0,102.4255
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,32,1,0,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,64,1,0,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,128,1,0,0.3681
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,256,1,0,0.5275
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,512,1,0,0.8862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1024,1,0,1.6354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,32768,1,0,31.2009
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1536,1,0,2.3705
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,2048,1,0,3.0840
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,3072,1,0,4.6563
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,4096,1,0,6.2531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,6144,1,0,9.5128
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,0,0.1846
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,8192,1,0,12.9319
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,16,1,0,0.2254
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,10240,1,0,16.4698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,12288,1,0,20.0198
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,32,1,0,0.2793
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,16384,1,0,27.5127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,128,1,0,0.5291
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,64,1,0,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,256,1,0,0.8864
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,512,1,0,1.6250
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1024,1,0,3.0552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1536,1,0,4.5265
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,2048,1,0,6.0372
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,3072,1,0,9.1712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,32768,1,0,62.1961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,4096,1,0,12.3662
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,6144,1,0,18.9752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,8192,1,0,25.7652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,0,0.1979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,16,1,0,0.2764
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,10240,1,0,32.7439
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,32,1,0,0.3702
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,12288,1,0,39.8733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,64,1,0,0.5314
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,128,1,0,0.8854
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,256,1,0,1.6089
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,512,1,0,3.0373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1024,1,0,5.9471
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,16384,1,0,54.8632
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1536,1,0,8.9099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,2048,1,0,11.9207
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,3072,1,0,18.2122
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,4096,1,0,24.5771
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,0,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,6144,1,0,37.7630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,16,1,0,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,32,1,0,0.5292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,32768,1,0,116.9401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,8192,1,0,51.3343
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,64,1,0,0.8874
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,128,1,0,1.6079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,256,1,0,3.0396
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,10240,1,0,65.2008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,512,1,0,5.9160
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1024,1,0,11.7565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,12288,1,0,79.5738
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1536,1,0,17.6364
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,2048,1,0,23.5365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,0,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,16,1,0,0.5316
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,16384,1,0,102.3768
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,3072,1,0,36.1933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,32,1,0,0.8836
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,64,1,0,1.6149
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,128,1,0,3.0279
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,4096,1,0,49.0025
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,256,1,0,5.9252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,512,1,0,11.6655
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1024,1,0,23.2862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,6144,1,0,75.2721
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,0,0.2231
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1536,1,0,35.0788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,16,1,0,0.8878
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,32,1,0,1.6135
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,8192,1,0,95.2329
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,64,1,0,3.0223
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,2048,1,0,46.9043
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,128,1,0,5.8909
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,256,1,0,11.7259
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,3072,1,0,72.1714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,0,0.2759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,512,1,0,23.1901
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,16,1,0,1.6182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,4096,1,0,90.5700
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,32,1,0,3.0209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,64,1,0,5.8957
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1024,1,0,46.3419
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,128,1,0,11.6657
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,0,0.3708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,256,1,0,23.1884
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1536,1,0,69.7315
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,16,1,0,3.0233
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,32,1,0,5.8923
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,2048,1,0,86.2393
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,64,1,0,11.6598
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,0,0.1733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,512,1,0,46.1219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,16,1,0,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,128,1,0,23.1072
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,32,1,0,0.2114
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,64,1,0,0.2179
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,128,1,0,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,256,1,0,0.3466
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,512,1,0,0.4827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,256,1,0,45.9701
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1024,1,0,0.7953
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1024,1,0,85.2441
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1536,1,0,1.1196
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,2048,1,0,1.4515
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,3072,1,0,2.1558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,4096,1,0,2.8344
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,6144,1,0,4.2557
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,8192,1,0,5.7186
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,10240,1,0,7.2542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,512,1,0,84.7355
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,12288,1,0,8.8288
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,16384,1,0,12.2454
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,0,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,16,1,0,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,32,1,0,0.2143
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,64,1,0,0.2628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,128,1,0,0.3433
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,256,1,0,0.4837
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,512,1,0,0.7926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1024,1,0,1.4340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1536,1,0,2.0799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,2048,1,0,2.7021
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,32768,1,0,28.0827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,3072,1,0,4.0773
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,4096,1,0,5.4670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,6144,1,0,8.3274
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,0,0.1816
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,8192,1,0,11.3668
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,10240,1,0,14.4828
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,12288,1,0,17.6531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,16,1,0,0.2257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,16384,1,0,24.3841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,32,1,0,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,64,1,0,0.3467
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,128,1,0,0.4834
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,256,1,0,0.7884
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,512,1,0,1.4144
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1024,1,0,2.6714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1536,1,0,3.9438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,2048,1,0,5.2472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,3072,1,0,7.9508
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,32768,1,0,55.9643
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,4096,1,0,10.8100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,6144,1,0,16.6111
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,0,0.1979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,8192,1,0,22.6033
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,16,1,0,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,32,1,0,0.3476
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,10240,1,0,29.2368
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,64,1,0,0.4839
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,12288,1,0,35.2256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,128,1,0,0.7879
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,256,1,0,1.4154
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,512,1,0,2.6597
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,16384,1,0,48.6625
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1024,1,0,5.1737
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1536,1,0,7.7197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,2048,1,0,10.3137
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,3072,1,0,15.8542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,4096,1,0,21.4996
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,0,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,6144,1,0,33.0746
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,16,1,0,0.3470
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,32768,1,0,111.7676
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,32,1,0,0.4843
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,8192,1,0,45.0830
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,64,1,0,0.7884
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,128,1,0,1.4126
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,10240,1,0,58.3207
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,256,1,0,2.6330
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,512,1,0,5.1447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,12288,1,0,70.2215
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1024,1,0,10.1733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1536,1,0,15.2960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,2048,1,0,20.4602
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,0,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,16384,1,0,97.1645
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,16,1,0,0.4859
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,3072,1,0,31.5502
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,32,1,0,0.7910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,64,1,0,1.4099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,4096,1,0,42.7521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,128,1,0,2.6445
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,256,1,0,5.1533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,512,1,0,11.9946
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,6144,1,0,65.9492
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1024,1,0,20.1785
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,0,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1536,1,0,30.3862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,16,1,0,0.7893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,32,1,0,1.4113
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,8192,1,0,89.9734
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,2048,1,0,40.6178
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,64,1,0,3.2164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,128,1,0,5.1131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,256,1,0,10.1480
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,3072,1,0,62.8266
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,0,0.2605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,512,1,0,20.0862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,16,1,0,1.4068
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,32,1,0,2.6328
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,4096,1,0,85.4189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1024,1,0,40.1204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,64,1,0,5.1079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,128,1,0,10.0768
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1536,1,0,60.3577
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,0,0.3456
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,16,1,0,2.6417
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,256,1,0,20.0857
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,32,1,0,5.1142
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,2048,1,0,81.1504
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,512,1,0,39.9371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,64,1,0,10.0647
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,0,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,128,1,0,19.9650
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,16,1,0,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,32,1,0,0.2062
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,128,1,0,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,64,1,0,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,256,1,0,39.9312
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,256,1,0,0.3293
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,512,1,0,0.4603
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1024,1,0,80.0807
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1024,1,0,0.7488
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1536,1,0,1.0491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,2048,1,0,1.3430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,3072,1,0,1.9962
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,4096,1,0,2.6209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,6144,1,0,4.0431
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,8192,1,0,5.3716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,10240,1,0,6.7730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,512,1,0,79.6318
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,12288,1,0,8.2406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,0,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,16384,1,0,11.5018
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,16,1,0,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,32,1,0,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,64,1,0,0.2558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,128,1,0,0.3327
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,256,1,0,0.4584
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,512,1,0,0.7437
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,32768,1,0,26.4826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1024,1,0,1.4713
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1536,1,0,1.9211
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,2048,1,0,2.5169
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,3072,1,0,3.7982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,4096,1,0,5.0938
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,6144,1,0,7.7242
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,8192,1,0,10.5600
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,0,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,10240,1,0,13.4908
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,16,1,0,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,12288,1,0,16.4507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,16384,1,0,22.9682
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,32,1,0,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,64,1,0,0.3332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,128,1,0,0.4632
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,256,1,0,0.8693
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,512,1,0,1.3129
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1024,1,0,2.4774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1536,1,0,3.6673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,2048,1,0,4.8443
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,3072,1,0,7.3999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,32768,1,0,52.8471
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,4096,1,0,10.0005
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,6144,1,0,15.4131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,8192,1,0,21.0212
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,0,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,16,1,0,0.2567
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,10240,1,0,26.8731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,32,1,0,0.3614
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,64,1,0,0.4616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,12288,1,0,33.1944
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,128,1,0,0.7438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,256,1,0,1.3110
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,512,1,0,2.4581
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,16384,1,0,45.5181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1024,1,0,4.7871
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1536,1,0,7.1416
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,2048,1,0,9.5251
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,3072,1,0,14.6559
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,4096,1,0,19.8948
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,0,0.1979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,6144,1,0,30.7383
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,16,1,0,0.3353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,8192,1,0,41.9252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,32,1,0,0.4621
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,32768,1,0,105.5281
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,64,1,0,0.7449
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,128,1,0,1.3125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,10240,1,0,54.4068
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,256,1,0,2.4365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,512,1,0,4.7523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,12288,1,0,65.5505
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1024,1,0,9.4041
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1536,1,0,14.0626
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,2048,1,0,18.8811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,0,0.2044
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,16384,1,0,91.6646
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,3072,1,0,29.1917
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,16,1,0,0.4655
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,32,1,0,0.7457
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,64,1,0,1.3087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,4096,1,0,39.6257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,128,1,0,2.4412
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,256,1,0,4.7777
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,512,1,0,11.2177
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,6144,1,0,61.2600
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1024,1,0,18.6035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,0,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1536,1,0,28.0632
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,16,1,0,0.7436
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,32,1,0,1.3137
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,8192,1,0,83.7259
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,2048,1,0,37.4855
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,64,1,0,3.0220
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,128,1,0,4.7274
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,256,1,0,9.3318
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,3072,1,0,58.1456
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,512,1,0,18.4821
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,0,0.2555
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,16,1,0,1.3092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,32,1,0,2.4407
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,4096,1,0,82.1194
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1024,1,0,37.0283
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,64,1,0,4.7361
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,128,1,0,9.2921
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1536,1,0,55.6779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,256,1,0,18.5029
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,0,0.3309
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,16,1,0,2.4419
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,32,1,0,4.7284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,2048,1,0,74.8668
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,512,1,0,36.7826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,64,1,0,9.2779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,0,0.1706
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,16,1,0,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,128,1,0,18.3838
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,32,1,0,0.1993
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,64,1,0,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,128,1,0,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,256,1,0,0.3246
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,256,1,0,36.8220
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,512,1,0,0.4502
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1024,1,0,73.8521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1024,1,0,0.7261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1536,1,0,1.0158
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,2048,1,0,1.3005
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,3072,1,0,1.9845
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,4096,1,0,2.5280
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,6144,1,0,3.8248
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,8192,1,0,5.2095
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,512,1,0,73.3725
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,10240,1,0,6.5348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,12288,1,0,7.9722
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,0,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,16384,1,0,11.0552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,16,1,0,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,32,1,0,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,64,1,0,0.2530
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,128,1,0,0.3268
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,256,1,0,0.4513
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,32768,1,0,25.7341
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,512,1,0,0.7243
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1024,1,0,1.2733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1536,1,0,1.8488
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,2048,1,0,2.5536
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,3072,1,0,3.6525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,4096,1,0,4.8930
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,6144,1,0,7.4716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,8192,1,0,10.2079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,10240,1,0,13.2046
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,0,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,12288,1,0,15.8848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,16,1,0,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,16384,1,0,22.0265
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,32,1,0,0.2527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,64,1,0,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,128,1,0,0.4497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,256,1,0,0.7215
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,512,1,0,1.2652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1024,1,0,2.3665
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1536,1,0,3.5085
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,2048,1,0,4.6703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,32768,1,0,51.3276
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,3072,1,0,7.1256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,4096,1,0,10.0115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,6144,1,0,14.8443
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,0,0.1917
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,8192,1,0,20.2440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,16,1,0,0.2495
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,10240,1,0,25.8983
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,32,1,0,0.3282
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,12288,1,0,31.6613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,64,1,0,0.4511
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,128,1,0,0.7189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,256,1,0,1.5144
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,16384,1,0,43.9841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,512,1,0,2.3537
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1024,1,0,4.5906
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1536,1,0,6.8669
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,2048,1,0,9.1520
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,3072,1,0,14.0734
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,4096,1,0,19.1334
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,0,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,6144,1,0,29.5601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,16,1,0,0.3577
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,32768,1,0,102.8549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,8192,1,0,40.3866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,32,1,0,0.4502
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,64,1,0,0.7245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,10240,1,0,51.6434
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,128,1,0,1.2620
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,256,1,0,2.3580
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,512,1,0,4.5736
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,12288,1,0,63.2468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1024,1,0,8.9780
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1536,1,0,13.5373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,2048,1,0,18.1185
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,0,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,16384,1,0,87.8851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,16,1,0,0.4538
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,3072,1,0,29.6710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,32,1,0,0.7240
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,64,1,0,1.2574
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,4096,1,0,39.5851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,128,1,0,2.3384
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,256,1,0,4.5548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,512,1,0,8.9513
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,6144,1,0,58.9326
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1024,1,0,17.7807
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,0,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,16,1,0,0.7242
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1536,1,0,26.8372
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,8192,1,0,80.6745
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,32,1,0,1.2600
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,2048,1,0,35.9886
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,64,1,0,2.3367
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,128,1,0,4.5407
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,256,1,0,8.9521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,3072,1,0,55.8915
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,512,1,0,17.7296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,0,0.2503
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,16,1,0,1.2690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,32,1,0,2.8777
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,4096,1,0,76.0903
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1024,1,0,35.4031
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,64,1,0,4.5366
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,128,1,0,8.8968
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1536,1,0,53.5322
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,0,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,256,1,0,17.7124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,16,1,0,2.3387
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,32,1,0,4.5263
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,2048,1,0,78.2774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,512,1,0,35.2439
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,64,1,0,8.9117
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,0,0.1618
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,128,1,0,17.6181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,16,1,0,0.2006
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,32,1,0,0.1991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,64,1,0,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,128,1,0,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,256,1,0,35.1887
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,256,1,0,0.3204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1024,1,0,70.7978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,512,1,0,0.4457
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1024,1,0,0.7947
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1536,1,0,0.9975
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,2048,1,0,1.2769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,3072,1,0,1.8928
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,4096,1,0,2.4700
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,6144,1,0,3.7525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,512,1,0,70.3108
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,8192,1,0,5.1114
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,10240,1,0,6.4182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,12288,1,0,7.8096
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,0,0.1687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,16384,1,0,10.8671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,16,1,0,0.2054
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,32,1,0,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,64,1,0,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,128,1,0,0.3501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,256,1,0,0.4441
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,32768,1,0,25.3257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,512,1,0,0.7082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1024,1,0,1.2598
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1536,1,0,1.8163
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,2048,1,0,2.3637
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,3072,1,0,3.7227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,4096,1,0,4.8005
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,6144,1,0,7.3108
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,8192,1,0,9.9684
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,10240,1,0,12.7847
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,0,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,16,1,0,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,12288,1,0,15.6027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,32,1,0,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,16384,1,0,21.6671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,64,1,0,0.3204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,128,1,0,0.4451
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,256,1,0,0.8236
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,512,1,0,1.2449
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1024,1,0,2.3317
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1536,1,0,3.4355
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,2048,1,0,4.5516
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,3072,1,0,6.9535
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,32768,1,0,50.5020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,4096,1,0,9.4338
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,6144,1,0,14.5595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,0,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,8192,1,0,20.2584
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,16,1,0,0.2522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,10240,1,0,25.3933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,32,1,0,0.3491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,12288,1,0,31.1014
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,64,1,0,0.4448
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,128,1,0,0.7082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,256,1,0,1.2460
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,512,1,0,2.3096
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,16384,1,0,43.1966
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1024,1,0,4.5019
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1536,1,0,6.7073
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,2048,1,0,8.9549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,3072,1,0,14.5827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,4096,1,0,18.7386
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,6144,1,0,28.9429
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,0,0.1921
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,16,1,0,0.3270
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,32768,1,0,100.9067
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,8192,1,0,39.6181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,32,1,0,0.4459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,64,1,0,0.7098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,10240,1,0,51.3615
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,128,1,0,1.2448
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,256,1,0,2.3067
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,512,1,0,4.4394
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,12288,1,0,62.0945
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1024,1,0,8.8178
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1536,1,0,13.2339
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,2048,1,0,17.7127
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,0,0.1994
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,16384,1,0,86.2830
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,3072,1,0,27.4482
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,16,1,0,0.4466
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,32,1,0,0.7132
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,64,1,0,1.5049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,4096,1,0,37.3391
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,128,1,0,2.2911
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,256,1,0,4.4723
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,512,1,0,8.7566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,6144,1,0,57.7579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1024,1,0,17.4362
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,0,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1536,1,0,26.2857
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,16,1,0,0.7093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,8192,1,0,79.1438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,2048,1,0,35.2526
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,32,1,0,1.2432
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,64,1,0,2.2923
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,128,1,0,5.4700
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,256,1,0,8.6986
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,3072,1,0,54.6726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,512,1,0,17.3555
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,0,0.2484
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,16,1,0,1.2379
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,4096,1,0,74.5602
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,32,1,0,2.2899
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1024,1,0,34.7106
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,64,1,0,4.4338
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,128,1,0,10.8110
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1536,1,0,52.4013
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,0,0.3227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,256,1,0,17.2373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,16,1,0,2.2882
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,32,1,0,4.4284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,2048,1,0,70.2411
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,512,1,0,34.4579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,64,1,0,8.7144
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,128,1,0,17.2535
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,0,0.1678
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,16,1,0,0.1924
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,32,1,0,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,64,1,0,0.2093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,128,1,0,0.2454
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,256,1,0,0.3200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,256,1,0,34.2796
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1024,1,0,69.1695
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,512,1,0,0.4397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1024,1,0,0.7109
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1536,1,0,0.9892
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,2048,1,0,1.2654
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,3072,1,0,1.9208
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,4096,1,0,2.4472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,6144,1,0,3.7129
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,8192,1,0,5.0160
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,512,1,0,68.7589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,10240,1,0,6.3595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,12288,1,0,7.7457
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,16384,1,0,10.7749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,0,0.1747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,16,1,0,0.1960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,32,1,0,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,64,1,0,0.2468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,128,1,0,0.3216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,256,1,0,0.4404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,32768,1,0,25.1661
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,512,1,0,0.7052
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1024,1,0,1.2455
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1536,1,0,1.7958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,2048,1,0,2.3316
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,3072,1,0,3.5362
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,4096,1,0,4.7596
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,8192,1,0,10.0498
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,6144,1,0,7.2430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,0,0.1778
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,10240,1,0,12.6577
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,12288,1,0,15.4689
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,16,1,0,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,16384,1,0,21.4512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,32,1,0,0.2484
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,64,1,0,0.3204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,128,1,0,0.4423
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,256,1,0,0.7034
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,512,1,0,1.2326
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1024,1,0,2.3059
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1536,1,0,3.3954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,32768,1,0,50.1322
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,2048,1,0,4.8547
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,3072,1,0,6.9029
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,4096,1,0,9.3334
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,6144,1,0,14.3877
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,0,0.1890
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,8192,1,0,19.6957
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,16,1,0,0.2488
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,32,1,0,0.3203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,10240,1,0,25.1919
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,12288,1,0,30.7965
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,64,1,0,0.4416
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,128,1,0,0.7020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,256,1,0,1.2296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,512,1,0,2.2944
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,16384,1,0,42.8015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1024,1,0,4.4512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1536,1,0,6.6439
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,2048,1,0,8.8522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,3072,1,0,13.6755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,4096,1,0,18.5371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,6144,1,0,28.6775
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,0,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,16,1,0,0.3218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,32768,1,0,100.1847
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,8192,1,0,39.2184
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,32,1,0,0.4396
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,64,1,0,0.7030
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,128,1,0,1.2278
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,10240,1,0,50.1842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,256,1,0,2.7436
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,512,1,0,4.4017
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,12288,1,0,61.5182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1024,1,0,8.7132
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1536,1,0,13.0821
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,2048,1,0,17.5011
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,0,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,3072,1,0,27.1742
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,16384,1,0,85.4965
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,16,1,0,0.4431
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,32,1,0,0.7048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,4096,1,0,36.9846
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,64,1,0,1.2273
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,128,1,0,2.2718
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,256,1,0,4.4370
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,512,1,0,8.6609
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,6144,1,0,57.1788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1024,1,0,17.2575
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,0,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,16,1,0,0.7046
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1536,1,0,28.9628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,32,1,0,1.2278
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,8192,1,0,78.3528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,2048,1,0,34.8316
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,64,1,0,2.2690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,128,1,0,4.3824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,256,1,0,8.6919
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,3072,1,0,54.0691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,512,1,0,17.1667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,0,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,4096,1,0,73.7650
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,16,1,0,1.4982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,32,1,0,2.2645
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1024,1,0,34.3284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,64,1,0,4.3946
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1536,1,0,51.7186
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,128,1,0,8.6002
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,0,0.3200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,256,1,0,17.1370
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,16,1,0,2.2727
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,2048,1,0,69.5080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,32,1,0,4.3793
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,512,1,0,34.0951
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,64,1,0,10.9751
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,128,1,0,17.0544
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,1,1,0,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,16,1,0,0.2477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,32,1,0,0.2756
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,64,1,0,0.3211
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1024,1,0,68.4122
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,128,1,0,0.3591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,256,1,0,34.1769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,256,1,0,0.4986
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,512,1,0,0.7961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,1024,1,0,1.4063
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,1536,1,0,2.0259
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,2048,1,0,2.7181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,3072,1,0,3.9657
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,4096,1,0,5.4570
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,6144,1,0,7.8842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,8192,1,0,10.9513
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,10240,1,0,13.7813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,512,1,0,82.4405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,12288,1,0,16.6507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,1,1,0,0.2292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,16,1,0,0.2732
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,32,1,0,0.3288
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,16384,1,0,22.6307
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,64,1,0,0.3590
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,128,1,0,0.4960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,256,1,0,0.7867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,512,1,0,1.3867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,1024,1,0,2.5959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,1,32768,1,0,41.5002
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,1536,1,0,3.8493
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,2048,1,0,5.4063
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,3072,1,0,7.6319
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,4096,1,0,10.5551
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,6144,1,0,15.9393
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,8192,1,0,21.4568
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,1,1,0,0.2442
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,16,1,0,0.3255
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,10240,1,0,27.0676
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,32,1,0,0.3557
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,64,1,0,0.4963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,12288,1,0,32.8180
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,128,1,0,0.7860
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,16384,1,0,37.2533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,256,1,0,1.4977
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,512,1,0,2.5700
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,1024,1,0,5.2003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,1536,1,0,7.4290
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,2048,1,0,10.2731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,3072,1,0,15.5061
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,4096,1,0,20.7670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,2,32768,1,0,81.7094
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,6144,1,0,31.5298
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,8192,1,0,35.2174
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,1,1,0,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,16,1,0,0.3574
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,32,1,0,0.4941
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,10240,1,0,44.6283
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,64,1,0,0.7805
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,128,1,0,1.3661
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,256,1,0,2.5669
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,12288,1,0,54.1098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,512,1,0,5.1729
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,1024,1,0,10.1504
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,16384,1,0,73.8914
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,1536,1,0,15.1644
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,2048,1,0,20.1895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,3072,1,0,30.5858
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,4096,1,0,34.1529
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,1,1,0,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,6144,1,0,50.5795
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,16,1,0,0.4943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,32,1,0,0.7824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,64,1,0,1.3705
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,128,1,0,2.5570
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,8192,1,0,70.4673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,4,32768,1,0,159.5883
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,256,1,0,5.1615
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,10240,1,0,87.7290
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,512,1,0,10.1095
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,1024,1,0,20.0278
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,12288,1,0,105.9419
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,1536,1,0,29.9865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,2048,1,0,33.0282
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,1,1,0,0.2873
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,16,1,0,0.7821
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,3072,1,0,50.4952
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,32,1,0,1.5062
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,64,1,0,2.8344
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,8,16384,1,0,144.9771
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,128,1,0,5.1452
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,4096,1,0,68.1992
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,256,1,0,10.0935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,512,1,0,20.0185
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,6144,1,0,97.2109
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,1024,1,0,32.7047
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,1,1,0,0.3388
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,1536,1,0,49.1096
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,16,1,0,1.3723
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,32,1,0,2.5526
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,64,1,0,5.1440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,2048,1,0,65.8607
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,16,8192,1,0,137.9059
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,128,1,0,10.0653
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,256,1,0,19.9649
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,1,1,0,0.4293
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,512,1,0,32.5348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,3072,1,0,98.4024
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,16,1,0,2.5613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,32,1,0,5.1365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,64,1,0,10.0742
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,1024,1,0,65.2014
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,32,4096,1,0,133.4291
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,128,1,0,19.9477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,256,1,0,32.5636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,1,1,0,0.6649
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,16,1,0,5.1442
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,1536,1,0,95.8590
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,32,1,0,10.0827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,1,1,0,0.1747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,64,1,0,19.9581
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,512,1,0,64.8214
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,64,2048,1,0,128.7891
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,128,1,0,32.4476
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,16,1,0,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,32,1,0,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,64,1,0,0.2974
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,128,1,0,0.3252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,256,1,0,0.4413
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,512,1,0,0.6712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,1024,1,0,1.2080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,1536,1,0,1.7237
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,2048,1,0,2.2500
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,256,1,0,64.9776
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,3072,1,0,3.3395
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,4096,1,0,4.4298
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,6144,1,0,6.6520
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,128,1024,1,0,127.3720
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,8192,1,0,8.9155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,10240,1,0,11.2342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,1,1,0,0.1859
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,12288,1,0,13.6108
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,16,1,0,0.2509
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,32,1,0,0.3048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,16384,1,0,18.5954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,64,1,0,0.3235
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,128,1,0,0.4355
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,256,1,0,0.7217
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,128,256,512,1,0,127.0538
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,512,1,0,1.1827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,1024,1,0,2.1981
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,1536,1,0,3.2204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,1,32768,1,0,40.7735
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,2048,1,0,4.2248
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,3072,1,0,6.3764
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,4096,1,0,8.5328
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,6144,1,0,13.0150
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,8192,1,0,17.4244
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,1,1,0,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,10240,1,0,22.2048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,16,1,0,0.3060
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,12288,1,0,26.7424
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,32,1,0,0.3200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,64,1,0,0.4348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,128,1,0,0.6565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,16384,1,0,36.5170
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,256,1,0,1.1766
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,512,1,0,2.1711
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,1024,1,0,4.1638
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,1536,1,0,6.1762
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,2048,1,0,8.2345
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,3072,1,0,12.4374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,4096,1,0,16.7538
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,2,32768,1,0,72.8381
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,6144,1,0,25.4731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,8192,1,0,34.7659
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,1,1,0,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,16,1,0,0.3232
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,32,1,0,0.4356
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,10240,1,0,43.6171
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,64,1,0,0.6582
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,128,1,0,1.1745
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,12288,1,0,52.9065
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,256,1,0,2.1591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,512,1,0,4.1220
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,1024,1,0,8.1342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,16384,1,0,64.9593
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,1536,1,0,12.1285
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,2048,1,0,16.2144
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,3072,1,0,24.7640
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,4096,1,0,33.3549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,1,1,0,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,6144,1,0,50.8994
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,16,1,0,0.4333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,32,1,0,0.6537
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,64,1,0,1.1698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,8192,1,0,61.4816
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,128,1,0,2.1576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,4,32768,1,0,145.0331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,256,1,0,4.0991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,10240,1,0,78.6601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,512,1,0,8.0777
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,1024,1,0,17.4103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,12288,1,0,94.7670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,1536,1,0,24.1403
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,2048,1,0,32.1925
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,1,1,0,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,16,1,0,0.6555
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,3072,1,0,49.2240
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,8,16384,1,0,129.9974
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,64,1,0,2.1581
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,32,1,0,1.1699
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,128,1,0,4.1109
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,4096,1,0,59.2915
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,256,1,0,8.0521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,512,1,0,15.9729
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,6144,1,0,90.6566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,1024,1,0,31.8232
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,1,1,0,0.2666
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,1536,1,0,47.9478
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,16,1,0,1.1723
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,32,1,0,2.1545
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,64,1,0,4.1105
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,2048,1,0,56.9214
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,16,8192,1,0,122.8878
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,128,1,0,8.0487
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,256,1,0,15.9771
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,1,1,0,0.3145
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,3072,1,0,87.4810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,512,1,0,31.7591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,16,1,0,2.1527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,32,1,0,4.1020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,1024,1,0,56.2925
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,64,1,0,8.0558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,32,4096,1,0,118.2628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,128,1,0,15.9095
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,1,1,0,0.4390
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,256,1,0,31.8142
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,16,1,0,4.0999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,1536,1,0,84.7430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,32,1,0,8.0444
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,64,1,0,15.9201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,1,1,0,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,512,1,0,56.0137
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,64,2048,1,0,113.6759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,32,1,0,0.2361
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,16,1,0,0.2165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,64,1,0,0.2788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,128,1,0,31.6255
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,128,1,0,0.2921
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,256,1,0,0.3858
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,512,1,0,0.5580
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,1024,1,0,0.9636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,1536,1,0,1.3880
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,256,1,0,55.8261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,2048,1,0,1.7865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,3072,1,0,2.6404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,4096,1,0,3.4878
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,128,1024,1,0,112.3059
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,6144,1,0,5.2519
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,8192,1,0,7.0565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,10240,1,0,8.9188
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,12288,1,0,10.8277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,16,1,0,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,16384,1,0,14.8979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,1,1,0,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,32,1,0,0.2792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,64,1,0,0.2871
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,64,256,512,1,0,111.9871
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,128,1,0,0.3846
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,256,1,0,0.5508
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,512,1,0,0.9390
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,1024,1,0,1.7380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,1,32768,1,0,33.3480
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,1536,1,0,2.5215
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,2048,1,0,3.2937
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,3072,1,0,4.9617
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,4096,1,0,6.6605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,6144,1,0,10.1295
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,8192,1,0,13.7498
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,1,1,0,0.1921
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,10240,1,0,17.4686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,16,1,0,0.2777
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,12288,1,0,21.2286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,32,1,0,0.2892
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,16384,1,0,29.1213
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,64,1,0,0.3817
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,128,1,0,0.5479
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,256,1,0,0.9302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,512,1,0,1.7079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,1024,1,0,3.2283
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,1536,1,0,4.7876
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,2048,1,0,6.3527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,3072,1,0,9.6706
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,2,32768,1,0,65.3816
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,4096,1,0,13.0460
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,6144,1,0,19.9498
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,8192,1,0,27.1097
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,16,1,0,0.2875
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,1,1,0,0.1951
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,32,1,0,0.3837
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,10240,1,0,34.4324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,64,1,0,0.6192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,12288,1,0,41.8883
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,256,1,0,1.9121
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,128,1,0,0.9239
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,512,1,0,3.2029
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,1024,1,0,6.2564
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,16384,1,0,57.5639
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,1536,1,0,9.3891
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,2048,1,0,12.4854
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,3072,1,0,19.2739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,4096,1,0,26.0031
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,1,1,0,0.2087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,6144,1,0,39.8312
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,16,1,0,0.3810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,32,1,0,0.5461
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,64,1,0,0.9221
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,8192,1,0,54.0906
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,128,1,0,1.6885
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,4,32768,1,0,122.4533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,256,1,0,3.1953
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,10240,1,0,68.6304
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,512,1,0,6.2164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,1024,1,0,12.3483
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,12288,1,0,83.7332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,1536,1,0,18.6348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,2048,1,0,24.8392
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,1,1,0,0.2227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,8,16384,1,0,107.7916
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,3072,1,0,38.3018
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,32,1,0,0.9264
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,16,1,0,0.5475
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,64,1,0,1.6901
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,4096,1,0,51.8847
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,128,1,0,3.1708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,256,1,0,6.2161
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,512,1,0,12.2674
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,6144,1,0,79.5152
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,1024,1,0,24.5245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,1,1,0,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,1536,1,0,36.9192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,16,1,0,0.9285
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,32,1,0,1.6873
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,64,1,0,3.1768
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,16,8192,1,0,100.7804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,2048,1,0,49.5376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,128,1,0,6.1861
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,256,1,0,12.3100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,1,1,0,0.2951
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,3072,1,0,79.0149
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,512,1,0,27.9206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,16,1,0,1.6891
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,32,1,0,3.1673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,32,4096,1,0,96.1918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,1024,1,0,48.8298
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,64,1,0,7.2286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,128,1,0,12.2427
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,1,1,0,0.4126
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,16,1,0,3.6997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,256,1,0,24.4454
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,1536,1,0,73.7016
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,32,1,0,6.1787
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,64,2048,1,0,91.4789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,64,1,0,12.2473
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,1,1,0,0.1667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,512,1,0,48.5906
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,128,1,0,28.4550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,16,1,0,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,32,1,0,0.2321
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,64,1,0,0.2658
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,128,1,0,0.2732
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,256,1,0,0.3581
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,512,1,0,0.5495
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,256,1,0,48.7430
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,1536,1,0,1.1967
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,1024,1,0,0.8352
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,2048,1,0,1.5440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,128,1024,1,0,90.2435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,3072,1,0,2.3498
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,4096,1,0,3.0262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,6144,1,0,4.5576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,8192,1,0,6.1500
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,10240,1,0,7.7543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,12288,1,0,9.4206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,1,1,0,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,32,256,512,1,0,89.8053
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,16384,1,0,13.0305
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,16,1,0,0.2306
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,32,1,0,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,64,1,0,0.2718
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,128,1,0,0.3558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,256,1,0,0.4987
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,512,1,0,0.8186
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,1024,1,0,1.4956
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,1536,1,0,2.1842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,1,32768,1,0,29.6440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,2048,1,0,2.8517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,3072,1,0,4.2751
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,4096,1,0,5.7358
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,6144,1,0,8.7147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,1,1,0,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,8192,1,0,11.8871
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,16,1,0,0.2664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,10240,1,0,15.1284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,12288,1,0,18.4367
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,32,1,0,0.2670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,64,1,0,0.3561
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,16384,1,0,25.4739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,128,1,0,0.4961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,256,1,0,0.8099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,512,1,0,1.4702
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,1024,1,0,2.7729
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,1536,1,0,4.0900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,2048,1,0,5.4349
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,3072,1,0,8.2758
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,2,32768,1,0,58.0389
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,4096,1,0,11.1942
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,6144,1,0,17.1830
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,8192,1,0,23.3807
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,1,1,0,0.1882
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,16,1,0,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,32,1,0,0.3549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,10240,1,0,29.8275
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,64,1,0,0.4967
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,12288,1,0,36.4236
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,128,1,0,0.8067
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,256,1,0,1.4611
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,512,1,0,2.7412
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,16384,1,0,50.2003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,1024,1,0,5.3408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,1536,1,0,7.9622
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,2048,1,0,10.6089
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,3072,1,0,16.4963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,4096,1,0,22.3261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,1,1,0,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,6144,1,0,34.3016
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,16,1,0,0.3564
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,32,1,0,0.4953
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,64,1,0,0.8072
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,8192,1,0,47.4155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,4,32768,1,0,114.9446
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,128,1,0,1.4509
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,10240,1,0,59.4922
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,256,1,0,2.7308
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,512,1,0,5.3002
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,12288,1,0,72.6639
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,1024,1,0,10.4565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,1536,1,0,15.8306
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,2048,1,0,21.1645
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,1,1,0,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,16,1,0,0.4979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,8,16384,1,0,100.9924
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,3072,1,0,32.7518
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,32,1,0,0.8070
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,64,1,0,1.4487
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,4096,1,0,44.5133
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,128,1,0,2.7160
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,256,1,0,5.2954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,512,1,0,10.3692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,1024,1,0,20.8356
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,6144,1,0,68.4781
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,1,1,0,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,1536,1,0,31.3724
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,16,1,0,0.8093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,32,1,0,1.4472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,64,1,0,2.7068
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,2048,1,0,42.1064
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,16,8192,1,0,93.3113
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,128,1,0,5.2607
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,256,1,0,10.4197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,3072,1,0,65.2681
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,1,1,0,0.2804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,512,1,0,20.7087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,16,1,0,1.4541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,32,1,0,2.7120
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,64,1,0,5.2603
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,32,4096,1,0,88.8632
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,1024,1,0,41.3549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,128,1,0,10.3629
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,1536,1,0,62.4571
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,256,1,0,20.7155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,1,1,0,0.3898
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,16,1,0,2.7099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,32,1,0,5.2468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,1,1,0,0.1627
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,64,2048,1,0,84.0789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,512,1,0,41.1649
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,64,1,0,10.3552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,128,1,0,20.5981
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,16,1,0,0.2082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,32,1,0,0.2296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,64,1,0,0.2636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,128,1,0,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,256,1,0,0.3432
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,256,1,0,41.2993
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,512,1,0,0.4814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,1024,1,0,0.7819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,1536,1,0,1.0891
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,128,1024,1,0,82.7073
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,2048,1,0,1.4155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,3072,1,0,2.1169
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,4096,1,0,2.7938
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,6144,1,0,4.2081
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,8192,1,0,5.6823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,10240,1,0,7.1867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,12288,1,0,8.7402
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,16,256,512,1,0,82.1330
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,16384,1,0,12.1077
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,1,1,0,0.1786
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,16,1,0,0.2244
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,32,1,0,0.2650
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,64,1,0,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,128,1,0,0.3433
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,256,1,0,0.4728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,512,1,0,0.7663
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,1024,1,0,1.3610
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,1536,1,0,2.0015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,1,32768,1,0,27.7905
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,2048,1,0,2.6099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,3072,1,0,4.0816
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,4096,1,0,5.4405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,6144,1,0,8.0259
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,8192,1,0,10.9578
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,1,1,0,0.1885
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,10240,1,0,13.9482
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,12288,1,0,17.0291
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,16,1,0,0.2642
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,32,1,0,0.2590
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,16384,1,0,23.5893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,64,1,0,0.3678
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,128,1,0,0.4721
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,256,1,0,0.7574
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,512,1,0,1.5530
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,1024,1,0,2.5331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,1536,1,0,3.7499
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,2048,1,0,4.9897
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,3072,1,0,7.5857
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,2,32768,1,0,54.3995
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,4096,1,0,10.2631
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,6144,1,0,15.7584
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,1,1,0,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,16,1,0,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,8192,1,0,21.5064
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,10240,1,0,27.4658
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,32,1,0,0.3390
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,12288,1,0,33.6434
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,64,1,0,0.4711
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,128,1,0,0.7563
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,256,1,0,1.3363
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,512,1,0,2.5004
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,16384,1,0,46.5398
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,1024,1,0,4.8589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,1536,1,0,7.2616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,2048,1,0,9.7292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,3072,1,0,15.0887
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,4096,1,0,20.4334
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,1,1,0,0.2054
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,6144,1,0,31.5484
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,16,1,0,0.3417
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,32,1,0,0.4707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,8192,1,0,43.0267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,64,1,0,0.7532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,4,32768,1,0,107.5377
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,128,1,0,1.3287
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,10240,1,0,54.8123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,256,1,0,2.5045
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,512,1,0,4.8359
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,12288,1,0,67.1880
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,1024,1,0,9.5273
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,1536,1,0,14.4001
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,2048,1,0,19.2964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,1,1,0,0.2165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,16,1,0,0.5449
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,8,16384,1,0,92.9563
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,3072,1,0,29.8811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,32,1,0,0.7558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,64,1,0,1.3305
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,4096,1,0,40.8104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,128,1,0,2.4809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,256,1,0,4.7991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,512,1,0,9.4820
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,6144,1,0,62.9556
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,1024,1,0,18.9556
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,1,1,0,0.2355
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,1536,1,0,28.5952
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,16,1,0,0.7591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,32,1,0,1.3267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,2048,1,0,38.4291
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,64,1,0,2.4827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,16,8192,1,0,85.9403
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,128,1,0,4.7948
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,256,1,0,9.4769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,3072,1,0,62.5098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,1,1,0,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,512,1,0,18.8277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,16,1,0,1.3338
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,32,1,0,2.4749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,32,4096,1,0,81.1731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,64,1,0,4.8007
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,1024,1,0,37.7481
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,128,1,0,9.4381
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,1,1,0,0.3773
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,1536,1,0,57.0096
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,256,1,0,18.8418
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,16,1,0,2.4800
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,32,1,0,4.7925
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,64,2048,1,0,76.5091
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,1,1,0,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,64,1,0,9.4251
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,512,1,0,44.6591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,128,1,0,18.7447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,16,1,0,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,32,1,0,0.2276
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,64,1,0,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,128,1,0,0.2610
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,256,1,0,0.3391
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,256,1,0,37.5744
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,512,1,0,0.4681
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,1024,1,0,0.7566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,128,1024,1,0,75.1719
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,1536,1,0,1.0485
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,2048,1,0,1.3661
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,3072,1,0,2.0332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,4096,1,0,2.6844
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,6144,1,0,4.0333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,8192,1,0,5.4431
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,8,256,512,1,0,74.7435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,10240,1,0,6.9073
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,12288,1,0,8.3913
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,16384,1,0,11.6492
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,1,1,0,0.1708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,16,1,0,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,32,1,0,0.2634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,64,1,0,0.2605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,128,1,0,0.3373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,256,1,0,0.4624
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,512,1,0,0.7404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,1024,1,0,1.3086
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,1,32768,1,0,26.8930
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,1536,1,0,1.9079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,2048,1,0,2.4909
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,3072,1,0,3.7725
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,4096,1,0,5.0509
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,6144,1,0,7.6797
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,8192,1,0,10.4960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,1,1,0,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,16,1,0,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,10240,1,0,13.4054
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,12288,1,0,16.3459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,32,1,0,0.2556
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,16384,1,0,22.6706
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,64,1,0,0.3341
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,128,1,0,0.4605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,256,1,0,0.7348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,512,1,0,1.2882
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,1024,1,0,2.4200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,1536,1,0,3.5615
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,2048,1,0,4.7638
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,3072,1,0,7.2413
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,2,32768,1,0,52.5612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,4096,1,0,9.8197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,6144,1,0,15.1190
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,8192,1,0,20.6373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,1,1,0,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,16,1,0,0.2597
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,10240,1,0,26.3299
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,32,1,0,0.3360
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,64,1,0,0.4584
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,12288,1,0,32.2129
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,128,1,0,0.7326
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,256,1,0,1.2826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,512,1,0,2.3871
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,16384,1,0,44.6954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,1024,1,0,4.6562
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,1536,1,0,6.9424
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,2048,1,0,9.2252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,3072,1,0,14.4187
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,4096,1,0,19.5366
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,1,1,0,0.2048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,6144,1,0,30.1651
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,16,1,0,0.3346
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,32,1,0,0.4595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,8192,1,0,41.1501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,4,32768,1,0,103.9478
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,64,1,0,0.7324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,128,1,0,1.2784
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,10240,1,0,52.5431
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,256,1,0,2.3792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,512,1,0,4.6119
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,12288,1,0,64.4707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,1024,1,0,9.0898
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,1536,1,0,13.7380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,2048,1,0,18.3578
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,1,1,0,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,16,1,0,0.4614
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,8,16384,1,0,90.0038
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,3072,1,0,28.5509
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,32,1,0,0.7318
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,4096,1,0,38.9798
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,64,1,0,1.2771
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,128,1,0,2.3623
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,256,1,0,4.5634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,512,1,0,9.0534
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,6144,1,0,60.2002
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,1024,1,0,18.0342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,1,1,0,0.2342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,1536,1,0,27.2466
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,16,1,0,0.7338
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,16,8192,1,0,82.3235
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,2048,1,0,36.6132
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,32,1,0,1.2794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,64,1,0,2.3596
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,128,1,0,4.5728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,256,1,0,9.0661
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,3072,1,0,56.9917
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,512,1,0,17.8755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,1,1,0,0.2714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,16,1,0,1.2801
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,32,1,0,2.3641
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,32,4096,1,0,77.5448
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,1024,1,0,35.9427
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,64,1,0,4.5742
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,128,1,0,8.9869
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,1536,1,0,54.1406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,1,1,0,0.3715
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,256,1,0,17.9305
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,16,1,0,2.3647
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,32,1,0,4.5691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,512,1,0,35.6588
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,64,2048,1,0,72.9662
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,64,1,0,8.9888
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,1,1,0,0.1605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,128,1,0,17.8112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,16,1,0,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,32,1,0,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,64,1,0,0.2608
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,128,1,0,0.2566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,256,1,0,35.6839
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,256,1,0,0.3345
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,512,1,0,0.4630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,1024,1,0,0.7444
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,128,1024,1,0,71.4786
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,1536,1,0,1.0382
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,2048,1,0,1.3279
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,3072,1,0,1.9658
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,4096,1,0,2.6209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,6144,1,0,3.9577
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,8192,1,0,5.3308
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,4,256,512,1,0,71.1349
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,10240,1,0,6.7372
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,1,1,0,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,12288,1,0,8.2271
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,16384,1,0,11.4385
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,16,1,0,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,32,1,0,0.2615
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,64,1,0,0.2579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,128,1,0,0.3311
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,512,1,0,0.7253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,1,32768,1,0,26.4493
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,256,1,0,0.4558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,1024,1,0,1.2829
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,1536,1,0,1.8668
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,2048,1,0,2.4428
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,3072,1,0,3.6903
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,4096,1,0,4.9321
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,6144,1,0,7.5174
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,8192,1,0,10.2440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,10240,1,0,13.1354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,1,1,0,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,12288,1,0,16.0201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,16,1,0,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,32,1,0,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,16384,1,0,22.2031
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,64,1,0,0.3289
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,128,1,0,0.4550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,256,1,0,0.7231
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,512,1,0,1.2627
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,1024,1,0,2.3749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,1536,1,0,3.4912
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,2048,1,0,4.6423
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,3072,1,0,7.0485
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,2,32768,1,0,51.6162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,4096,1,0,9.5880
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,6144,1,0,14.7667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,8192,1,0,20.1429
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,1,1,0,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,16,1,0,0.2572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,10240,1,0,25.7470
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,32,1,0,0.3319
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,12288,1,0,31.5693
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,64,1,0,0.4528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,128,1,0,0.7209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,256,1,0,1.2575
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,512,1,0,2.3343
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,16384,1,0,43.7496
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,1024,1,0,4.5378
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,1536,1,0,6.7639
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,2048,1,0,9.0201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,3072,1,0,14.0692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,4096,1,0,19.0611
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,6144,1,0,29.4557
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,1,1,0,0.2030
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,16,1,0,0.3307
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,8192,1,0,40.2082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,32,1,0,0.4538
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,64,1,0,0.7191
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,4,32768,1,0,102.0789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,10240,1,0,51.4291
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,128,1,0,1.2556
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,256,1,0,2.3165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,512,1,0,4.5011
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,1024,1,0,8.8511
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,12288,1,0,63.0734
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,1536,1,0,13.3535
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,2048,1,0,17.8998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,1,1,0,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,8,16384,1,0,87.5087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,16,1,0,0.4543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,3072,1,0,27.8752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,32,1,0,0.7203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,64,1,0,1.2479
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,4096,1,0,39.5027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,128,1,0,2.2997
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,256,1,0,4.4959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,512,1,0,8.8097
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,6144,1,0,58.8204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,1024,1,0,17.5558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,1,1,0,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,1536,1,0,26.5092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,16,1,0,0.7211
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,32,1,0,1.2542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,16,8192,1,0,80.4717
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,2048,1,0,35.5648
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,64,1,0,2.3123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,128,1,0,4.4568
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,256,1,0,8.8031
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,3072,1,0,55.6770
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,1,1,0,0.2660
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,512,1,0,17.4447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,16,1,0,1.2528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,32,4096,1,0,75.7354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,32,1,0,2.3002
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,64,1,0,4.4630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,1024,1,0,40.9320
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,128,1,0,8.7471
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,1536,1,0,52.6764
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,256,1,0,17.4813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,1,1,0,0.3671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,16,1,0,2.3055
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,32,1,0,4.4494
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,64,2048,1,0,71.1057
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,512,1,0,34.7330
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,64,1,0,8.7250
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,1,1,0,0.1657
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,128,1,0,17.3626
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,16,1,0,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,32,1,0,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,64,1,0,0.2488
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,128,1,0,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,256,1,0,34.7490
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,256,1,0,0.3286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,512,1,0,0.4595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,128,1024,1,0,69.7818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,1024,1,0,0.7389
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,1536,1,0,1.0195
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,2048,1,0,1.3177
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,3072,1,0,1.9515
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,4096,1,0,2.5559
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,6144,1,0,3.9178
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,8192,1,0,5.2865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,10240,1,0,6.6882
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,2,256,512,1,0,69.4252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,12288,1,0,8.1818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,1,1,0,0.1647
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,16384,1,0,11.3239
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,16,1,0,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,32,1,0,0.2603
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,64,1,0,0.2539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,128,1,0,0.3256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,256,1,0,0.4526
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,512,1,0,0.7201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,1,32768,1,0,26.1928
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,1024,1,0,1.2739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,1536,1,0,1.8391
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,2048,1,0,2.5567
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,3072,1,0,3.6349
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,4096,1,0,4.8677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,6144,1,0,7.4396
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,8192,1,0,10.1541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,10240,1,0,12.9812
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,1,1,0,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,16,1,0,0.2492
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,12288,1,0,15.8569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,32,1,0,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,16384,1,0,21.9843
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,64,1,0,0.3246
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,128,1,0,0.4499
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,256,1,0,0.7159
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,512,1,0,1.2435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,1024,1,0,2.3440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,1536,1,0,3.4502
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,2048,1,0,4.5927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,3072,1,0,6.9822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,2,32768,1,0,51.1566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,4096,1,0,9.4655
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,6144,1,0,14.5716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,8192,1,0,19.9420
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,1,1,0,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,16,1,0,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,10240,1,0,25.4698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,32,1,0,0.3240
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,12288,1,0,31.2206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,64,1,0,0.4492
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,128,1,0,0.7119
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,256,1,0,1.2374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,512,1,0,2.3104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,1024,1,0,4.4568
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,16384,1,0,43.3005
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,1536,1,0,6.6626
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,2048,1,0,8.9190
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,3072,1,0,13.8756
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,4096,1,0,18.8132
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,1,1,0,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,6144,1,0,29.1307
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,16,1,0,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,8192,1,0,39.7408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,4,32768,1,0,101.2254
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,32,1,0,0.4482
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,64,1,0,0.7122
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,128,1,0,1.2339
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,10240,1,0,50.8449
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,256,1,0,2.2695
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,12288,1,0,62.3691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,512,1,0,4.4279
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,1024,1,0,10.2091
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,1536,1,0,13.2115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,2048,1,0,17.6720
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,8,16384,1,0,86.6501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,1,1,0,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,3072,1,0,27.5284
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,16,1,0,0.4515
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,32,1,0,0.7111
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,4096,1,0,37.5097
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,64,1,0,1.5195
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,128,1,0,2.2808
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,256,1,0,4.4426
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,512,1,0,8.6753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,1024,1,0,17.3404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,6144,1,0,58.1074
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,1,1,0,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,16,1,0,0.7144
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,1536,1,0,26.1285
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,32,1,0,1.5055
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,16,8192,1,0,79.5427
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,2048,1,0,35.1650
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,64,1,0,2.2864
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,128,1,0,4.3971
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,256,1,0,8.6197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,3072,1,0,54.8999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,512,1,0,17.2066
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,1,1,0,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,16,1,0,1.2336
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,32,4096,1,0,74.9446
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,32,1,0,2.2799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,1024,1,0,34.4817
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,64,1,0,4.3998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,128,1,0,8.6429
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,1536,1,0,52.0478
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,256,1,0,17.1119
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,1,1,0,0.3636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,16,1,0,2.2789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,32,1,0,4.4007
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,64,2048,1,0,70.1390
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,512,1,0,34.2774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,64,1,0,8.6336
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,128,1,0,17.1049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,256,1,0,34.1594
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,128,1024,1,0,68.9197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,fp8,fp8,nvfp4,1,256,512,1,0,68.3686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,16,1,0,0.4428
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,1,1,0,0.2501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,32,1,0,0.6030
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,64,1,0,0.9724
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,128,1,0,1.7278
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,1024,1,0,13.5455
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,1536,1,0,18.2366
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,2048,1,0,24.2553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,10240,1,0,69.0225
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,12288,1,0,84.7049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,512,1,0,6.2477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,8,16384,1,0,105.0498
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,3072,1,0,37.8581
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,1,1,0,0.2571
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,16,1,0,0.6121
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,4096,1,0,50.9422
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,32,1,0,1.0973
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,64,1,0,2.0194
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,256,1,0,3.1763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,256,1,0,6.2630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,512,1,0,12.0449
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,6144,1,0,79.4219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,1024,1,0,23.9621
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,1536,1,0,36.0752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,16,8192,1,0,99.5870
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,16,1,0,0.9801
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,128,1,0,3.1763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,2048,1,0,48.1778
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,32,1,0,2.0246
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,64,1,0,3.1737
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,128,1,0,6.0937
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,256,1,0,12.0274
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,3072,1,0,80.5958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,512,1,0,23.7692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,16,1,0,1.7596
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,32,4096,1,0,95.4697
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,1024,1,0,47.6875
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,32,1,0,3.1666
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,64,1,0,7.3902
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,128,1,0,14.0651
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,1536,1,0,71.7761
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,256,1,0,28.1146
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,2048,1,0,88.8967
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,1,1,0,0.4405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,16,1,0,3.1713
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,512,1,0,47.3201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,32,1,0,6.0831
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,64,1,0,11.9822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,128,1,0,23.7423
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,1024,1,0,89.8739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,256,1,0,47.5465
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,256,512,1,0,87.0919
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,16,1,0,0.2860
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,32,1,0,0.2861
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,1,1,0,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,64,1,1,0,0.2842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,32,128,1,1,0,0.3282
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,256,1,0,0.4144
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,512,1,0,0.6025
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,64,1,0,0.3208
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,1024,1,0,0.8702
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,128,1,0,0.3304
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,4096,1,0,3.0602
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,1536,1,0,1.2206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,2048,1,0,1.5539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,3072,1,0,2.6462
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,1,1,0,0.2552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,16,1,0,0.2936
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,6144,1,0,4.4287
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,32768,1,0,27.6814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,8192,1,0,6.6109
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,10240,1,0,8.9263
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,32,1,0,0.3292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,12288,1,0,9.3002
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,64,1,0,0.3317
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,1,16384,1,0,12.6076
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,128,1,0,0.4403
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,256,1,0,0.5497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,512,1,0,0.8610
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,1024,1,0,1.4999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,1536,1,0,2.1553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,2048,1,0,2.7851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,3072,1,0,4.3163
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,4096,1,0,6.3210
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,6144,1,0,8.5413
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,8192,1,0,12.6516
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,10240,1,0,15.1336
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,1,1,0,0.2167
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,12288,1,0,18.6498
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,32,1,0,0.3298
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,16,1,0,0.3080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,16384,1,0,24.1847
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,64,1,0,0.4133
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,128,1,0,0.5482
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,256,1,0,0.8442
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,512,1,0,1.4731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,1024,1,0,2.7245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,1536,1,0,3.9506
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,2048,1,0,5.2435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,2,32768,1,0,53.9538
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,3072,1,0,8.0735
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,4096,1,0,11.1519
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,6144,1,0,16.9658
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,8192,1,0,22.7757
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,1,1,0,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,16,1,0,0.3282
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,10240,1,0,28.9710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,32,1,0,0.4124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,12288,1,0,34.5881
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,64,1,0,0.5455
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,128,1,0,0.8535
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,256,1,0,1.4673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,16384,1,0,48.7824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,512,1,0,2.6813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,1024,1,0,5.1337
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,1536,1,0,7.6090
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,2048,1,0,10.8585
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,3072,1,0,17.4517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,4096,1,0,23.6705
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,6144,1,0,34.1314
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,1,1,0,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,4,32768,1,0,107.0950
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,16,1,0,0.4169
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,8192,1,0,46.5998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,10240,1,0,56.6406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,32,1,0,0.5468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,64,1,0,0.8521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,128,1,0,1.4579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,256,1,0,2.6908
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,512,1,0,5.0981
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,12288,1,0,71.3084
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,1024,1,0,10.0027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,1536,1,0,15.0906
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,2048,1,0,20.6291
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,1,1,0,0.2491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,8,16384,1,0,96.0405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,3072,1,0,32.8218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,16,1,0,0.5476
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,32,1,0,0.8565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,4096,1,0,43.6464
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,64,1,0,1.4576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,128,1,0,3.1665
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,256,1,0,5.1137
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,512,1,0,9.9587
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,6144,1,0,65.2331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,1024,1,0,20.3348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,1,1,0,0.2668
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,16,1,0,0.8454
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,1536,1,0,29.9265
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,16,8192,1,0,91.0878
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,32,1,0,1.4848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,2048,1,0,40.0730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,64,1,0,2.6672
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,128,1,0,5.0691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,256,1,0,9.9936
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,3072,1,0,62.7979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,1,1,0,0.3064
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,512,1,0,19.6982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,16,1,0,1.4587
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,32,4096,1,0,85.2281
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,1024,1,0,40.4415
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,32,1,0,3.1863
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,64,1,0,5.0716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,128,1,0,9.9021
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,1536,1,0,59.3924
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,1,1,0,0.4124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,256,1,0,19.7759
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,16,1,0,2.6620
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,32,1,0,5.0589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,64,2048,1,0,80.0043
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,64,1,0,9.9180
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,512,1,0,39.2746
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,128,1,0,23.8208
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,256,1,0,39.3277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,128,1024,1,0,80.4789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,16,256,512,1,0,78.4481
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,16,1,0,0.2665
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,32,1,0,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,64,1,0,0.3194
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,1024,1,0,0.8072
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,128,1,0,0.3242
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,2048,1,0,1.3992
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,256,1,0,0.3971
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,1,1,0,0.1733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,512,1,0,0.5269
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,1536,1,0,1.1036
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,4096,1,0,3.1258
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,1,1,0,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,16,1,0,0.2710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,3072,1,0,2.4714
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,32,1,0,0.3065
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,6144,1,0,4.8557
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,8192,1,0,6.2850
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,64,1,0,0.3132
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,128,1,0,0.4213
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,32768,1,0,26.7178
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,10240,1,0,6.8536
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,256,1,0,0.5190
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,512,1,0,0.7928
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,1024,1,0,1.3600
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,12288,1,0,9.0976
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,1,16384,1,0,12.5783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,1536,1,0,1.9375
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,2048,1,0,2.5216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,3072,1,0,4.2332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,4096,1,0,5.6589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,6144,1,0,8.8371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,8192,1,0,11.1491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,1,1,0,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,16,1,0,0.3096
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,12288,1,0,16.2460
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,10240,1,0,13.5448
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,16384,1,0,23.2837
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,32,1,0,0.3066
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,64,1,0,0.3981
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,512,1,0,1.3327
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,1024,1,0,2.4970
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,256,1,0,0.7863
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,128,1,0,0.5176
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,1536,1,0,3.5829
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,2048,1,0,4.7263
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,3072,1,0,7.5287
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,2,32768,1,0,49.8647
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,4096,1,0,10.1404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,6144,1,0,15.6639
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,1,1,0,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,8192,1,0,22.2910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,10240,1,0,25.8691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,16,1,0,0.3106
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,32,1,0,0.3958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,12288,1,0,33.7210
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,64,1,0,0.5171
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,128,1,0,0.7820
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,256,1,0,1.3192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,16384,1,0,45.0069
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,512,1,0,2.4213
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,1024,1,0,4.7272
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,1536,1,0,6.8669
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,2048,1,0,9.1780
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,3072,1,0,14.5019
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,4096,1,0,20.2329
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,1,1,0,0.2410
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,6144,1,0,29.7027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,4,32768,1,0,98.9043
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,8192,1,0,41.5004
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,16,1,0,0.3918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,32,1,0,0.5165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,64,1,0,0.7805
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,128,1,0,1.3190
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,10240,1,0,52.8567
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,256,1,0,2.4136
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,512,1,0,4.5668
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,12288,1,0,62.8149
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,1024,1,0,9.0004
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,1536,1,0,13.5331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,2048,1,0,18.0774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,8,16384,1,0,86.7392
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,1,1,0,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,3072,1,0,28.9664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,16,1,0,0.5179
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,32,1,0,0.7842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,4096,1,0,39.7994
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,64,1,0,1.3200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,128,1,0,2.3998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,256,1,0,4.6017
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,512,1,0,9.1553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,6144,1,0,61.5932
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,1024,1,0,17.7564
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,1,1,0,0.2545
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,16,8192,1,0,80.9296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,1536,1,0,26.8558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,16,1,0,0.7809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,32,1,0,1.3195
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,2048,1,0,36.0739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,64,1,0,2.4447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,256,1,0,8.9336
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,3072,1,0,56.8864
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,128,1,0,4.5673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,512,1,0,17.6011
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,32,4096,1,0,77.9525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,1,1,0,0.2933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,16,1,0,1.3211
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,32,1,0,2.3976
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,1024,1,0,35.3976
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,64,1,0,4.5517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,1536,1,0,53.4129
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,128,1,0,8.8985
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,1,1,0,0.3961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,16,1,0,2.3940
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,256,1,0,17.6994
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,64,2048,1,0,73.3372
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,512,1,0,35.2355
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,32,1,0,4.5555
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,64,1,0,8.9162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,128,1,0,17.5910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,256,1,0,35.0262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,128,1024,1,0,70.5543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,8,256,512,1,0,69.9799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,32,1,0,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,16,1,0,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,64,1,0,0.3027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,1024,1,0,0.7810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,128,1,0,0.3068
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,256,1,0,0.3875
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,512,1,0,0.5101
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,1,1,0,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,2048,1,0,1.3280
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,1536,1,0,1.0569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,4096,1,0,2.9040
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,1,1,0,0.2491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,16,1,0,0.2708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,3072,1,0,2.4543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,32,1,0,0.3061
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,6144,1,0,4.4998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,16384,1,0,12.0731
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,64,1,0,0.3010
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,128,1,0,0.3847
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,256,1,0,0.5050
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,8192,1,0,5.6110
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,32768,1,0,25.8625
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,10240,1,0,8.0362
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,512,1,0,0.7634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,1024,1,0,1.2953
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,1,12288,1,0,8.1678
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,1536,1,0,1.8342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,2048,1,0,2.3840
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,3072,1,0,3.5426
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,4096,1,0,5.0114
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,6144,1,0,8.0340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,8192,1,0,11.7988
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,10240,1,0,12.6535
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,1,1,0,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,12288,1,0,15.5453
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,16,1,0,0.3048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,32,1,0,0.3013
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,16384,1,0,21.3804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,64,1,0,0.3881
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,128,1,0,0.5025
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,256,1,0,0.7506
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,512,1,0,1.2963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,1024,1,0,2.3247
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,1536,1,0,3.3792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,2048,1,0,4.4733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,2,32768,1,0,49.3929
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,3072,1,0,7.3283
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,4096,1,0,11.9153
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,6144,1,0,14.1340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,1,1,0,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,16,1,0,0.2984
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,8192,1,0,21.3084
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,10240,1,0,25.7569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,32,1,0,0.3810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,12288,1,0,30.0926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,64,1,0,0.5009
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,128,1,0,0.7542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,256,1,0,1.2825
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,16384,1,0,42.1102
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,512,1,0,2.2932
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,1024,1,0,4.3491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,1536,1,0,6.5143
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,2048,1,0,8.6648
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,3072,1,0,14.3934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,4096,1,0,19.0781
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,6144,1,0,29.5298
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,4,32768,1,0,95.5763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,1,1,0,0.2358
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,8192,1,0,39.9500
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,16,1,0,0.3851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,32,1,0,0.5017
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,64,1,0,0.7525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,10240,1,0,50.4879
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,128,1,0,1.2572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,256,1,0,2.2677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,512,1,0,4.4198
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,12288,1,0,61.4838
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,1024,1,0,8.4964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,1536,1,0,12.8164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,2048,1,0,17.0686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,8,16384,1,0,83.1840
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,1,1,0,0.2392
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,3072,1,0,26.7047
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,16,1,0,0.5022
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,32,1,0,0.7472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,64,1,0,1.2570
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,4096,1,0,37.5371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,128,1,0,2.2608
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,256,1,0,4.3482
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,512,1,0,8.4473
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,6144,1,0,57.3468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,1024,1,0,16.7468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,1,1,0,0.2552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,16,8192,1,0,79.1267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,1536,1,0,25.3262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,16,1,0,0.7629
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,32,1,0,1.2527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,2048,1,0,34.0191
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,64,1,0,2.2636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,128,1,0,4.2981
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,3072,1,0,54.0429
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,256,1,0,8.4631
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,512,1,0,17.0401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,32,4096,1,0,74.8707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,1,1,0,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,16,1,0,1.2597
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,32,1,0,2.2630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,1024,1,0,33.2996
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,64,1,0,4.3970
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,1536,1,0,50.3212
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,128,1,0,8.4010
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,1,1,0,0.3839
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,256,1,0,16.6876
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,16,1,0,2.3100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,64,2048,1,0,67.6087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,512,1,0,33.1486
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,32,1,0,4.2968
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,64,1,0,8.3979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,128,1,0,16.5690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,256,1,0,33.3277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,128,1024,1,0,78.2656
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,4,256,512,1,0,67.5561
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,1,1,0,0.1779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,16,1,0,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,32,1,0,0.2669
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,64,1,0,0.2943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,1024,1,0,0.7721
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,128,1,0,0.2988
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,256,1,0,0.3772
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,512,1,0,0.5036
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,1536,1,0,1.0362
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,2048,1,0,1.3005
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,4096,1,0,2.7506
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,1,1,0,0.2273
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,16,1,0,0.2606
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,3072,1,0,2.0120
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,6144,1,0,4.2503
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,32,1,0,0.2898
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,8192,1,0,5.0195
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,64,1,0,0.2967
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,16384,1,0,11.2528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,128,1,0,0.3788
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,256,1,0,0.4990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,512,1,0,0.7501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,10240,1,0,7.7153
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,32768,1,0,24.6590
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,1024,1,0,1.2629
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,1536,1,0,1.7909
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,1,12288,1,0,7.7959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,2048,1,0,2.3276
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,3072,1,0,3.9723
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,4096,1,0,5.6834
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,6144,1,0,7.1859
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,1,1,0,0.1907
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,8192,1,0,10.3228
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,10240,1,0,13.4299
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,16,1,0,0.2999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,12288,1,0,15.7833
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,32,1,0,0.3033
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,16384,1,0,21.5823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,64,1,0,0.3816
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,128,1,0,0.4961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,512,1,0,1.2468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,256,1,0,0.7427
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,1024,1,0,2.2455
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,1536,1,0,3.2809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,2048,1,0,4.6675
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,2,32768,1,0,47.4513
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,3072,1,0,7.8480
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,4096,1,0,10.4130
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,6144,1,0,13.9820
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,8192,1,0,19.8456
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,1,1,0,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,16,1,0,0.2978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,10240,1,0,25.2214
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,12288,1,0,29.3338
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,32,1,0,0.3798
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,64,1,0,0.4949
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,256,1,0,1.4564
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,128,1,0,0.7451
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,16384,1,0,41.4921
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,512,1,0,2.2128
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,1024,1,0,4.2525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,1536,1,0,6.3045
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,2048,1,0,8.3927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,3072,1,0,14.7550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,4096,1,0,18.6213
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,4,32768,1,0,93.7793
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,6144,1,0,28.6903
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,1,1,0,0.2375
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,8192,1,0,38.7005
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,16,1,0,0.3807
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,32,1,0,0.4931
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,10240,1,0,49.0804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,64,1,0,0.7389
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,128,1,0,1.2267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,256,1,0,2.2468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,512,1,0,4.1738
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,12288,1,0,59.1287
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,1024,1,0,8.2393
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,1536,1,0,12.4427
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,2048,1,0,16.6245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,8,16384,1,0,82.9797
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,3072,1,0,27.6935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,1,1,0,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,16,1,0,0.4957
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,32,1,0,0.7393
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,4096,1,0,38.1377
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,64,1,0,1.2256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,128,1,0,2.2026
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,256,1,0,4.2084
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,6144,1,0,57.0915
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,512,1,0,8.1973
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,1024,1,0,16.2954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,16,8192,1,0,77.5160
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,1,1,0,0.2450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,1536,1,0,25.1244
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,16,1,0,0.8737
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,2048,1,0,32.9813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,32,1,0,1.2289
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,64,1,0,2.1939
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,128,1,0,4.1703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,256,1,0,8.2039
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,3072,1,0,56.1740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,512,1,0,16.5368
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,1,1,0,0.2874
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,32,4096,1,0,72.4422
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,16,1,0,1.4931
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,32,1,0,2.1995
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,1024,1,0,32.3522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,64,1,0,4.2672
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,128,1,0,8.1440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,1536,1,0,48.8029
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,1,1,0,0.3841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,16,1,0,2.2470
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,256,1,0,16.1824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,32,1,0,4.1658
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,64,2048,1,0,65.4664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,512,1,0,32.2445
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,64,1,0,8.1549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,128,1,0,16.0847
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,256,1,0,32.8099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,128,1024,1,0,75.6184
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,2,256,512,1,0,64.0295
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,16,1,0,0.2517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,64,1,0,0.2979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,32,1,0,0.2627
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,1024,1,0,0.7586
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,128,1,0,0.2946
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,256,1,0,0.3852
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,512,1,0,0.5003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,1,1,0,0.1768
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,1536,1,0,1.0147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,4096,1,0,2.9943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,3072,1,0,2.1796
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,2048,1,0,1.2811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,6144,1,0,3.8636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,1,1,0,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,16,1,0,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,32,1,0,0.2979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,8192,1,0,5.1923
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,64,1,0,0.2903
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,10240,1,0,6.3301
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,128,1,0,0.3818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,12288,1,0,9.3935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,256,1,0,0.4923
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,512,1,0,0.7482
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,1024,1,0,1.3808
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,1536,1,0,1.7605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,2048,1,0,2.2980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,3072,1,0,4.2812
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,4096,1,0,4.7898
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,16384,1,0,10.8378
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,6144,1,0,8.0729
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,8192,1,0,9.9678
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,10240,1,0,12.9432
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,12288,1,0,15.8511
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,1,32768,1,0,25.4644
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,1,1,0,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,16,1,0,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,32,1,0,0.2906
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,16384,1,0,20.4324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,64,1,0,0.3769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,128,1,0,0.4881
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,256,1,0,0.7354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,512,1,0,1.2168
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,1024,1,0,2.2625
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,1536,1,0,3.2361
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,2,32768,1,0,46.5235
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,2048,1,0,4.3022
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,3072,1,0,7.8350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,4096,1,0,10.2232
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,6144,1,0,14.1985
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,8192,1,0,19.0323
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,1,1,0,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,16,1,0,0.2910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,10240,1,0,24.9865
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,32,1,0,0.3803
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,12288,1,0,31.4847
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,64,1,0,0.4913
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,128,1,0,0.7269
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,16384,1,0,41.3090
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,256,1,0,1.2178
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,512,1,0,2.2286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,1024,1,0,4.1844
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,1536,1,0,6.2117
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,2048,1,0,8.2483
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,3072,1,0,13.9517
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,4096,1,0,17.9978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,4,32768,1,0,92.0728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,6144,1,0,26.9653
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,1,1,0,0.2321
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,8192,1,0,39.8125
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,16,1,0,0.3779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,32,1,0,0.4895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,64,1,0,0.7332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,128,1,0,1.2113
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,10240,1,0,49.6145
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,256,1,0,2.1839
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,512,1,0,4.2262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,12288,1,0,59.2220
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,1024,1,0,8.1323
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,1536,1,0,12.2405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,2048,1,0,16.3523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,8,16384,1,0,82.6637
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,3072,1,0,25.6136
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,1,1,0,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,16,1,0,0.4893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,32,1,0,0.7341
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,4096,1,0,37.0032
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,64,1,0,1.2146
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,128,1,0,2.7294
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,256,1,0,4.1048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,512,1,0,8.0679
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,6144,1,0,57.5669
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,1024,1,0,16.0440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,1,1,0,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,16,1,0,0.7409
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,1536,1,0,24.1710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,16,8192,1,0,73.9276
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,32,1,0,1.2126
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,2048,1,0,32.4789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,64,1,0,2.1678
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,128,1,0,4.1076
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,3072,1,0,52.6823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,256,1,0,8.2378
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,512,1,0,15.9210
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,1,1,0,0.2869
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,16,1,0,1.5015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,32,1,0,2.2143
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,1024,1,0,31.8686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,32,4096,1,0,71.0253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,64,1,0,4.1092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,128,1,0,8.0273
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,1536,1,0,48.0790
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,1,1,0,0.3756
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,256,1,0,15.9380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,16,1,0,2.1717
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,64,2048,1,0,64.6312
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,32,1,0,4.1035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,512,1,0,32.3566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,64,1,0,8.0049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,0,0.2349
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,16,1,0,0.2876
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,128,1,0,20.4321
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,32,1,0,0.2778
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,64,1,0,0.3023
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,128,1024,1,0,63.4079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,256,1,0,31.4639
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,128,1,0,0.3699
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,256,1,0,0.5006
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,512,1,0,0.7762
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1024,1,0,1.3438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1536,1,0,1.9079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,2048,1,0,2.5031
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,3072,1,0,3.7218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,fp8_block,1,256,512,1,0,63.0727
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,4096,1,0,4.9579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,6144,1,0,7.3987
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,8192,1,0,9.8885
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,10240,1,0,12.4299
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,12288,1,0,15.0456
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,0,0.2451
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,16,1,0,0.2819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,32,1,0,0.3020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,16384,1,0,20.4667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,64,1,0,0.3699
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,128,1,0,0.5001
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,256,1,0,0.7741
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,512,1,0,1.3499
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1024,1,0,2.4859
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1536,1,0,3.6691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,32768,1,0,36.9183
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,2048,1,0,4.8521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,3072,1,0,7.2216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,4096,1,0,9.6668
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,6144,1,0,14.6130
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,0,0.2476
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,8192,1,0,19.6668
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,16,1,0,0.3028
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,10240,1,0,24.7820
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,12288,1,0,29.9819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,32,1,0,0.3667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,64,1,0,0.4999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,128,1,0,0.7774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,16384,1,0,33.5189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,256,1,0,1.3483
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,512,1,0,2.4779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1024,1,0,4.8069
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1536,1,0,7.4348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,2048,1,0,9.4302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,3072,1,0,14.2816
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,32768,1,0,73.4993
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,4096,1,0,19.1388
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,6144,1,0,29.0741
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,0,0.2565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,8192,1,0,31.8804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,16,1,0,0.3670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,10240,1,0,40.3381
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,32,1,0,0.5023
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,64,1,0,0.7770
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,128,1,0,1.3480
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,12288,1,0,48.9834
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,256,1,0,2.4702
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,512,1,0,4.7949
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1024,1,0,9.3934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,16384,1,0,66.8747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1536,1,0,14.0632
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,2048,1,0,18.6929
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,3072,1,0,28.3787
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,4096,1,0,30.8087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,0,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,6144,1,0,47.0187
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,16,1,0,0.5007
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,32,1,0,0.7781
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,8192,1,0,63.6326
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,64,1,0,1.3329
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,32768,1,0,146.8943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,128,1,0,2.4698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,256,1,0,4.8048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,10240,1,0,80.5592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,512,1,0,9.3375
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,12288,1,0,97.9567
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1024,1,0,18.5643
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1536,1,0,27.7753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,2048,1,0,29.8251
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,0,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,16,1,0,0.7764
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,16384,1,0,133.6800
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,32,1,0,1.3358
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,3072,1,0,45.6557
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,64,1,0,2.4707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,4096,1,0,61.4998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,128,1,0,4.7776
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,256,1,0,9.3332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,512,1,0,18.5150
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1024,1,0,29.5652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,6144,1,0,94.0005
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,0,0.2959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1536,1,0,44.5138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,16,1,0,1.3423
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,32,1,0,2.4612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,64,1,0,4.7862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,2048,1,0,59.4522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,8192,1,0,127.4218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,128,1,0,9.3366
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,256,1,0,18.4522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,0,0.3680
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,3072,1,0,91.0912
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,512,1,0,29.4950
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,16,1,0,2.4594
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,32,1,0,4.7866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,64,1,0,9.3206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,4096,1,0,123.1252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1024,1,0,59.0317
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,128,1,0,18.4704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,0,0.4979
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,256,1,0,29.4394
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1536,1,0,88.8165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,16,1,0,4.7845
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,32,1,0,9.3297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,512,1,0,58.7951
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,64,1,0,18.4636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,2048,1,0,119.1208
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,0,0.2023
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,128,1,0,29.4179
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,16,1,0,0.2627
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,32,1,0,0.2527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,64,1,0,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,128,1,0,0.3216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,256,1,0,0.4200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,512,1,0,0.6252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1024,1,0,1.0858
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,256,1,0,58.9866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1536,1,0,1.5276
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,2048,1,0,1.9836
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,3072,1,0,2.9092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1024,1,0,118.1591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,4096,1,0,3.8552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,6144,1,0,5.7824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,8192,1,0,7.7537
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,10240,1,0,9.7801
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,12288,1,0,11.8553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,0,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,16,1,0,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,16384,1,0,16.1917
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,32,1,0,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,512,1,0,117.7585
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,64,1,0,0.3183
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,128,1,0,0.4216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,256,1,0,0.6245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,32768,1,0,35.7104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,512,1,0,1.0823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1024,1,0,1.9641
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1536,1,0,2.8457
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,2048,1,0,3.7401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,3072,1,0,5.7534
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,4096,1,0,7.5440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,6144,1,0,11.3835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,8192,1,0,15.4103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,0,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,10240,1,0,19.4595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,16,1,0,0.2696
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,12288,1,0,23.6229
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,32,1,0,0.3228
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,64,1,0,0.4202
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,128,1,0,0.6959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,16384,1,0,32.3210
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,256,1,0,1.0818
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,512,1,0,1.9593
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1024,1,0,3.7131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1536,1,0,5.4969
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,2048,1,0,7.2950
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,3072,1,0,11.0478
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,32768,1,0,63.6648
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,4096,1,0,14.9043
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,6144,1,0,22.6886
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,8192,1,0,30.6911
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,0,0.2275
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,16,1,0,0.3220
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,32,1,0,0.4203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,10240,1,0,38.7541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,64,1,0,0.6225
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,128,1,0,1.0804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,12288,1,0,47.0956
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,256,1,0,1.9559
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,512,1,0,3.6974
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,16384,1,0,57.3552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1024,1,0,7.2267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1536,1,0,10.8151
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,2048,1,0,14.4439
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,3072,1,0,22.0188
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,4096,1,0,29.6382
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,0,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,16,1,0,0.4240
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,6144,1,0,45.1724
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,32768,1,0,127.1432
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,32,1,0,0.6257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,8192,1,0,53.8359
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,64,1,0,1.0826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,128,1,0,1.9486
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,10240,1,0,68.2207
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,256,1,0,3.6934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,512,1,0,7.1982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1024,1,0,14.3050
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,12288,1,0,83.0989
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1536,1,0,21.4774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,2048,1,0,28.6186
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,0,0.2468
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,16384,1,0,113.9414
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,16,1,0,0.6270
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,3072,1,0,45.1115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,32,1,0,1.0769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,64,1,0,1.9425
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,4096,1,0,51.7094
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,128,1,0,3.6894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,256,1,0,7.2055
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,512,1,0,14.2569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,6144,1,0,79.2755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1024,1,0,28.3722
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,0,0.2752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1536,1,0,42.5592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,16,1,0,1.0839
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,32,1,0,1.9472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,64,1,0,3.6872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,8192,1,0,107.5135
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,2048,1,0,49.7504
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,128,1,0,7.1703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,256,1,0,14.2813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,3072,1,0,76.3654
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,0,0.3166
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,512,1,0,28.2624
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,16,1,0,1.9505
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,32,1,0,3.6809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,4096,1,0,103.4421
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,64,1,0,7.1752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1024,1,0,54.8118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,128,1,0,14.2073
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,0,0.4212
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,256,1,0,28.2610
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1536,1,0,74.0935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,16,1,0,3.6756
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,32,1,0,7.1741
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,2048,1,0,99.3100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,64,1,0,14.2175
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,512,1,0,49.0728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,0,0.1774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,128,1,0,28.1795
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,16,1,0,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,32,1,0,0.2274
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,64,1,0,0.2338
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,128,1,0,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,256,1,0,49.0901
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,256,1,0,0.3699
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,512,1,0,0.5179
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1024,1,0,0.8666
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1536,1,0,1.2219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1024,1,0,98.2868
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,2048,1,0,1.5752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,3072,1,0,2.3028
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,4096,1,0,3.0361
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,6144,1,0,4.5506
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,8192,1,0,6.0978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,10240,1,0,7.7079
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,12288,1,0,9.3399
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,0,0.1972
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,16384,1,0,12.9518
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,16,1,0,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,32,1,0,0.2410
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,64,1,0,0.2841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,512,1,0,112.3257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,256,1,0,0.5152
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,128,1,0,0.3710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,32768,1,0,29.1607
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,512,1,0,0.8605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1024,1,0,1.5577
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1536,1,0,2.2432
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,2048,1,0,2.9192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,3072,1,0,4.3844
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,4096,1,0,5.8539
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,6144,1,0,8.9074
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,8192,1,0,12.1333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,10240,1,0,15.4008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,0,0.1965
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,12288,1,0,18.7104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,16,1,0,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,32,1,0,0.2874
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,64,1,0,0.3705
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,16384,1,0,25.7847
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,128,1,0,0.5179
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,256,1,0,0.8603
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,512,1,0,1.5480
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1024,1,0,2.8743
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1536,1,0,4.2658
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,2048,1,0,5.6450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,32768,1,0,57.9606
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,3072,1,0,8.5927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,4096,1,0,11.6573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,6144,1,0,18.0740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,8192,1,0,24.1577
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,0,0.2094
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,16,1,0,0.2866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,10240,1,0,30.6227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,32,1,0,0.3710
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,64,1,0,0.5172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,12288,1,0,37.2872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,128,1,0,0.8558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,256,1,0,1.5486
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,512,1,0,2.8576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,16384,1,0,51.2823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1024,1,0,6.2229
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1536,1,0,8.3629
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,2048,1,0,11.1572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,3072,1,0,17.0888
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,4096,1,0,23.1207
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,0,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,6144,1,0,35.3807
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,16,1,0,0.3722
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,32768,1,0,108.2598
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,8192,1,0,48.0794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,32,1,0,0.5204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,64,1,0,0.8532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,128,1,0,1.5454
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,10240,1,0,61.0664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,256,1,0,2.8692
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,512,1,0,5.5579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,12288,1,0,74.4747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1024,1,0,11.0385
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1536,1,0,16.5104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,2048,1,0,22.1200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,0,0.2244
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,16384,1,0,95.0140
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,16,1,0,0.5182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,3072,1,0,35.3615
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,32,1,0,0.8601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,4096,1,0,46.0094
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,64,1,0,1.5419
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,128,1,0,2.8547
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,256,1,0,5.5273
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,512,1,0,10.9704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,6144,1,0,70.6216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1024,1,0,21.8720
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,0,0.2440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,16,1,0,0.8573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1536,1,0,32.8904
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,32,1,0,1.5438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,8192,1,0,88.6324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,64,1,0,2.8612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,2048,1,0,43.9443
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,128,1,0,5.5275
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,256,1,0,11.0204
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,3072,1,0,67.7340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,0,0.2852
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,512,1,0,21.7614
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,16,1,0,1.5458
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,4096,1,0,84.4474
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,32,1,0,2.8529
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,64,1,0,5.5268
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1024,1,0,49.0837
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,128,1,0,10.9354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,0,0.3684
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1536,1,0,65.4785
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,256,1,0,21.7703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,16,1,0,2.8561
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,32,1,0,5.5232
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,2048,1,0,80.3323
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,64,1,0,10.9286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,512,1,0,43.2113
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,0,0.1815
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,16,1,0,0.2259
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,128,1,0,21.6957
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,32,1,0,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,64,1,0,0.2227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,128,1,0,0.2592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,256,1,0,0.3451
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1024,1,0,79.4397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,512,1,0,0.4697
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1024,1,0,0.7552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1536,1,0,1.0501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,256,1,0,51.2410
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,2048,1,0,1.3503
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,3072,1,0,2.0033
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,4096,1,0,2.6406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,6144,1,0,3.9475
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,8192,1,0,5.2958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,10240,1,0,6.6984
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,512,1,0,79.0566
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,12288,1,0,8.1340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,16384,1,0,11.3055
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,0,0.1885
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,16,1,0,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,32,1,0,0.2208
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,64,1,0,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,128,1,0,0.3685
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,256,1,0,0.4679
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,32768,1,0,25.9337
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,512,1,0,0.7500
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1536,1,0,1.9412
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1024,1,0,1.3389
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,2048,1,0,2.5207
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,3072,1,0,3.7868
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,4096,1,0,5.0629
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,6144,1,0,7.7196
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,8192,1,0,10.5027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,10240,1,0,13.3557
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,0,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,16,1,0,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,32,1,0,0.2641
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,12288,1,0,16.2738
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,64,1,0,0.3424
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,16384,1,0,22.5798
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,128,1,0,0.4687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,256,1,0,0.7490
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,512,1,0,1.3327
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1024,1,0,2.4905
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1536,1,0,3.6542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,2048,1,0,4.8252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,3072,1,0,7.3956
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,32768,1,0,51.5282
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,4096,1,0,10.0007
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,6144,1,0,15.3343
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,8192,1,0,21.2367
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,0,0.1947
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,16,1,0,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,10240,1,0,26.6712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,32,1,0,0.3457
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,12288,1,0,32.4745
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,64,1,0,0.4706
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,128,1,0,0.7494
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,256,1,0,1.3279
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,512,1,0,2.4749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,16384,1,0,44.8726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1024,1,0,4.7816
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1536,1,0,7.1489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,2048,1,0,10.2105
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,3072,1,0,14.6528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,4096,1,0,19.8647
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,0,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,6144,1,0,30.5877
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,16,1,0,0.3450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,32768,1,0,102.6945
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,32,1,0,0.4685
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,8192,1,0,41.6604
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,64,1,0,0.8961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,128,1,0,1.3234
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,10240,1,0,52.9970
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,256,1,0,2.4698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,512,1,0,4.7577
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,12288,1,0,64.8558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1024,1,0,9.3840
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1536,1,0,14.1326
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,2048,1,0,18.8565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,0,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,16384,1,0,89.4533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,16,1,0,0.5489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,3072,1,0,29.1353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,32,1,0,0.7485
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,64,1,0,1.3248
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,4096,1,0,39.5747
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,128,1,0,2.9819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,256,1,0,4.7726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,512,1,0,9.3565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,6144,1,0,61.0115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1024,1,0,18.6370
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1536,1,0,27.9879
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,0,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,16,1,0,0.7499
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,8192,1,0,83.0869
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,32,1,0,1.3246
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,2048,1,0,37.5722
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,64,1,0,2.4503
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,128,1,0,4.7178
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,256,1,0,9.3464
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,3072,1,0,58.0623
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,0,0.2592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,512,1,0,18.5244
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,16,1,0,1.3309
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,32,1,0,2.4502
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,4096,1,0,82.1140
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1024,1,0,37.1110
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,64,1,0,4.7234
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,128,1,0,9.2991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,0,0.3442
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1536,1,0,55.7735
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,256,1,0,18.5267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,16,1,0,2.4523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,32,1,0,4.7117
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,2048,1,0,74.8326
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,512,1,0,36.9051
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,64,1,0,9.3219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,0,0.1654
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,16,1,0,0.2223
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,128,1,0,18.4691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,32,1,0,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,64,1,0,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,128,1,0,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,256,1,0,36.8867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,256,1,0,0.3271
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,512,1,0,0.4459
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1024,1,0,0.7085
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1024,1,0,73.8547
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1536,1,0,0.9799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,2048,1,0,1.2447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,3072,1,0,1.8407
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,4096,1,0,2.4338
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,6144,1,0,3.6532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,8192,1,0,4.8941
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,10240,1,0,6.2667
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,512,1,0,73.4237
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,12288,1,0,7.5600
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,0,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,16384,1,0,10.5078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,16,1,0,0.2164
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,32,1,0,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,64,1,0,0.2558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,128,1,0,0.3242
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,256,1,0,0.4441
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,32768,1,0,24.3168
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,512,1,0,0.7061
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1024,1,0,1.2264
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1536,1,0,1.7727
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,2048,1,0,2.3133
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,3072,1,0,3.6548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,4096,1,0,4.6707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,6144,1,0,7.1019
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,8192,1,0,9.7190
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,10240,1,0,12.3542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,0,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,16,1,0,0.2208
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,32,1,0,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,12288,1,0,15.0522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,16384,1,0,20.9285
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,64,1,0,0.3271
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,128,1,0,0.4437
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,256,1,0,0.8253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,512,1,0,1.2282
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1024,1,0,2.2849
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1536,1,0,3.3562
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,2048,1,0,4.4291
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,32768,1,0,48.2783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,3072,1,0,6.8115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,4096,1,0,9.2169
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,6144,1,0,14.1426
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,0,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,8192,1,0,19.2953
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,10240,1,0,24.6031
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,16,1,0,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,32,1,0,0.3577
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,12288,1,0,30.0800
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,64,1,0,0.4462
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,128,1,0,0.7032
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,256,1,0,1.2234
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,16384,1,0,41.6821
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,512,1,0,2.2617
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1024,1,0,4.3912
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1536,1,0,6.5554
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,2048,1,0,8.7398
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,3072,1,0,13.4346
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,4096,1,0,18.2512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,0,0.2156
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,6144,1,0,28.1292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,16,1,0,0.3298
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,32768,1,0,96.2548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,32,1,0,0.4452
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,8192,1,0,38.4707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,64,1,0,0.6981
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,128,1,0,1.2221
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,10240,1,0,48.9964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,256,1,0,2.2586
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,512,1,0,4.3601
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,12288,1,0,60.8033
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1024,1,0,8.6129
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1536,1,0,12.8725
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,2048,1,0,17.2917
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,16384,1,0,83.1709
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,0,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,3072,1,0,26.7250
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,16,1,0,0.4461
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,32,1,0,0.7061
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,4096,1,0,36.3528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,64,1,0,1.2196
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,128,1,0,2.2494
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,256,1,0,4.3209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,512,1,0,8.5811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,6144,1,0,56.1169
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1024,1,0,17.0508
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,0,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1536,1,0,25.6296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,16,1,0,0.7059
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,8192,1,0,76.7254
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,32,1,0,1.2201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,2048,1,0,34.3685
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,64,1,0,2.2450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,128,1,0,4.3274
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,256,1,0,8.5812
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,3072,1,0,53.3068
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,512,1,0,20.7337
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,0,0.2531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,16,1,0,1.2194
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,32,1,0,2.2480
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,4096,1,0,72.4680
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1024,1,0,33.8980
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,64,1,0,4.3314
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,128,1,0,8.5358
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1536,1,0,50.9894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,0,0.3285
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,256,1,0,16.9451
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,16,1,0,2.2494
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,32,1,0,5.5108
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,2048,1,0,68.3945
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,512,1,0,33.6237
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,64,1,0,8.5165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,128,1,0,16.8628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,0,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,16,1,0,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,32,1,0,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,64,1,0,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,128,1,0,0.2512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,256,1,0,33.7934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,512,1,0,0.4331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,256,1,0,0.3428
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1024,1,0,67.3775
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1024,1,0,0.6823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1536,1,0,0.9438
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,2048,1,0,1.2024
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,3072,1,0,1.7612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,4096,1,0,2.3274
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,6144,1,0,3.4895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,512,1,0,67.1031
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,8192,1,0,4.6793
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,10240,1,0,5.9497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,12288,1,0,7.2518
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,0,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,16,1,0,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,16384,1,0,10.1205
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,32,1,0,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,64,1,0,0.2485
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,128,1,0,0.3168
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,256,1,0,0.4296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,512,1,0,0.6833
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,32768,1,0,23.5078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1024,1,0,1.1851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1536,1,0,1.6972
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,2048,1,0,2.2094
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,3072,1,0,3.3092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,4096,1,0,4.4634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,6144,1,0,6.8086
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,8192,1,0,9.3019
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,10240,1,0,11.8450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,0,0.1794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,12288,1,0,14.6292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,16,1,0,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,16384,1,0,20.1292
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,32,1,0,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,64,1,0,0.3218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,128,1,0,0.4320
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,256,1,0,0.6745
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,512,1,0,1.1757
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1024,1,0,2.1728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1536,1,0,3.1985
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,2048,1,0,4.2397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,32768,1,0,46.6684
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,3072,1,0,6.4859
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,4096,1,0,8.8124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,6144,1,0,13.5120
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,0,0.1960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,8192,1,0,18.8837
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,16,1,0,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,10240,1,0,23.6194
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,32,1,0,0.3185
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,64,1,0,0.4312
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,128,1,0,0.6798
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,12288,1,0,28.8218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,256,1,0,1.1686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,16384,1,0,40.0613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,512,1,0,2.1574
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1024,1,0,4.1556
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1536,1,0,6.2378
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,2048,1,0,9.0926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,3072,1,0,12.8408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,4096,1,0,17.4443
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,0,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,6144,1,0,26.9249
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,16,1,0,0.3217
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,32768,1,0,92.9861
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,8192,1,0,36.8531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,32,1,0,0.4325
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,64,1,0,0.6816
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,128,1,0,1.1720
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,10240,1,0,46.9716
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,256,1,0,2.1433
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,512,1,0,4.1488
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,12288,1,0,57.5757
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1024,1,0,8.2219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1536,1,0,12.3119
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,2048,1,0,16.4683
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,0,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,16384,1,0,79.9074
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,3072,1,0,25.4776
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,16,1,0,0.4334
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,32,1,0,0.6830
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,64,1,0,1.1726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,4096,1,0,34.6975
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,128,1,0,2.7095
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,256,1,0,4.1124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,512,1,0,8.1733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,6144,1,0,53.7856
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1024,1,0,16.2333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,0,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1536,1,0,24.4413
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,16,1,0,0.6808
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,8192,1,0,73.4582
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,2048,1,0,32.6881
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,32,1,0,1.1696
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,64,1,0,2.1402
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,128,1,0,4.1227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,3072,1,0,50.8434
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,256,1,0,8.1627
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,512,1,0,16.1114
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,0,0.2515
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,4096,1,0,69.2359
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,32,1,0,2.1408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,16,1,0,1.4635
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1024,1,0,32.2563
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,64,1,0,4.1272
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,128,1,0,8.1202
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1536,1,0,48.5477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,0,0.3186
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,16,1,0,2.1406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,256,1,0,16.0841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,2048,1,0,65.1445
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,32,1,0,4.1152
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,64,1,0,8.1124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,512,1,0,31.9475
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,128,1,0,16.0249
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,0,0.1746
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,16,1,0,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,32,1,0,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,64,1,0,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,256,1,0,32.1720
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,128,1,0,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1024,1,0,64.2572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,256,1,0,0.3149
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,512,1,0,0.4268
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1024,1,0,0.6741
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1536,1,0,0.9205
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,2048,1,0,1.1819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,3072,1,0,1.7277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,4096,1,0,2.2732
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,6144,1,0,3.4787
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,8192,1,0,4.5933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,10240,1,0,5.8368
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,512,1,0,63.7985
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,12288,1,0,7.0857
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,0,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,16,1,0,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,32,1,0,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,16384,1,0,9.8969
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,64,1,0,0.2527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,128,1,0,0.3116
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,32768,1,0,23.1188
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,256,1,0,0.4249
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,512,1,0,0.6677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1024,1,0,1.1550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1536,1,0,1.6647
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,2048,1,0,2.3155
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,3072,1,0,3.2534
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,4096,1,0,4.3597
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,6144,1,0,6.6434
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,8192,1,0,9.1012
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,10240,1,0,11.6026
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,0,0.1858
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,16,1,0,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,12288,1,0,14.1644
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,16384,1,0,19.7389
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,32,1,0,0.2522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,64,1,0,0.3180
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,128,1,0,0.4246
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,256,1,0,0.6681
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,512,1,0,1.1501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1024,1,0,2.1064
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1536,1,0,3.1235
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,2048,1,0,4.1472
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,32768,1,0,45.8784
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,3072,1,0,6.3367
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,4096,1,0,8.6092
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,6144,1,0,13.2349
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,8192,1,0,18.1228
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,0,0.1949
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,16,1,0,0.2506
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,10240,1,0,23.0826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,32,1,0,0.3182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,64,1,0,0.4268
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,12288,1,0,28.2433
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,128,1,0,0.6664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,256,1,0,1.1470
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,16384,1,0,39.2380
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,512,1,0,2.0952
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1024,1,0,4.0783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1536,1,0,6.0904
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,2048,1,0,8.1450
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,3072,1,0,12.5504
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,4096,1,0,17.0398
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,0,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,6144,1,0,26.3203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,16,1,0,0.3170
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,32768,1,0,91.7990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,8192,1,0,36.0240
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,32,1,0,0.4253
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,64,1,0,0.6688
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,10240,1,0,46.0107
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,128,1,0,1.1473
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,256,1,0,2.1059
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,12288,1,0,56.3897
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,512,1,0,4.0200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1024,1,0,8.0004
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1536,1,0,12.0119
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,2048,1,0,16.0594
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,16384,1,0,78.2737
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,0,0.2026
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,3072,1,0,24.8959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,16,1,0,0.4280
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,32,1,0,0.6705
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,64,1,0,1.1457
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,4096,1,0,35.5341
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,128,1,0,2.0978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,256,1,0,4.0501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,512,1,0,7.9475
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,6144,1,0,52.5172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1024,1,0,15.8503
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,0,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,8192,1,0,71.8578
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1536,1,0,23.7959
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,16,1,0,0.6655
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,2048,1,0,31.9234
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,32,1,0,1.1512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,64,1,0,2.0922
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,128,1,0,4.0254
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,3072,1,0,49.6521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,256,1,0,7.9425
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,512,1,0,15.6799
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,0,0.2470
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,4096,1,0,67.6569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,16,1,0,1.1523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,32,1,0,2.0894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1024,1,0,31.4579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,64,1,0,4.0098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,128,1,0,7.8990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1536,1,0,53.8098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,256,1,0,15.7835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,0,0.3177
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,16,1,0,2.0874
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,2048,1,0,63.6027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,32,1,0,4.0098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,512,1,0,31.2609
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,64,1,0,7.9156
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,0,0.1670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,16,1,0,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,128,1,0,15.6628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,32,1,0,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1024,1,0,62.6293
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,256,1,0,31.2543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,64,1,0,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,128,1,0,0.2499
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,512,1,0,0.4235
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1024,1,0,0.7433
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,256,1,0,0.3130
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1536,1,0,0.9172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,2048,1,0,1.1695
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,3072,1,0,1.7103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,4096,1,0,2.2552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,6144,1,0,3.3775
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,8192,1,0,4.5501
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,10240,1,0,5.7669
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,512,1,0,62.1198
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,12288,1,0,7.0245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,0,0.1772
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,16,1,0,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,32,1,0,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,64,1,0,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,16384,1,0,9.8229
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,128,1,0,0.3101
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,32768,1,0,22.9541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,256,1,0,0.4213
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,512,1,0,0.6639
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1024,1,0,1.1505
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1536,1,0,1.6489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,2048,1,0,2.1359
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,3072,1,0,3.2166
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,4096,1,0,4.2964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,6144,1,0,6.5721
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,8192,1,0,9.0069
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,0,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,10240,1,0,11.4669
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,16,1,0,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,12288,1,0,14.0146
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,16384,1,0,19.5356
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,32,1,0,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,64,1,0,0.3143
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,128,1,0,0.4218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,256,1,0,0.6625
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,512,1,0,1.1354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1024,1,0,2.1039
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1536,1,0,3.0908
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,32768,1,0,45.6365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,2048,1,0,4.0883
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,3072,1,0,6.2634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,4096,1,0,8.5193
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,6144,1,0,13.0562
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,8192,1,0,17.9299
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,0,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,16,1,0,0.2481
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,10240,1,0,22.8345
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,32,1,0,0.3151
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,64,1,0,0.4222
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,128,1,0,0.6619
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,12288,1,0,27.9382
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,256,1,0,1.1405
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,512,1,0,2.0708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,16384,1,0,38.8811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1024,1,0,4.0235
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1536,1,0,6.0285
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,2048,1,0,8.0463
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,3072,1,0,12.3860
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,4096,1,0,16.8423
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,6144,1,0,26.0269
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,0,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,16,1,0,0.3146
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,32768,1,0,90.6617
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,8192,1,0,36.4148
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,32,1,0,0.4213
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,64,1,0,0.6647
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,10240,1,0,45.4623
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,128,1,0,1.1377
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,256,1,0,2.0786
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,512,1,0,3.9797
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,12288,1,0,55.8197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1024,1,0,7.9131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1536,1,0,11.8687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,2048,1,0,15.8289
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,0,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,16384,1,0,77.4491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,3072,1,0,24.6145
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,16,1,0,0.4248
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,32,1,0,0.6651
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,64,1,0,1.1423
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,4096,1,0,33.5575
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,128,1,0,2.0722
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,256,1,0,4.0015
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,512,1,0,9.6288
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,6144,1,0,51.9072
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1024,1,0,15.6139
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,0,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,16,1,0,0.6615
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,8192,1,0,71.0990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1536,1,0,23.4992
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,32,1,0,1.1382
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,64,1,0,2.6265
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,2048,1,0,34.4626
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,128,1,0,3.9678
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,3072,1,0,49.0008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,256,1,0,7.8548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,512,1,0,15.5206
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,0,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,4096,1,0,66.8809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,16,1,0,1.1454
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,32,1,0,2.0712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1024,1,0,31.0129
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,64,1,0,3.9770
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,128,1,0,7.8083
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1536,1,0,46.7888
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,0,0.3123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,256,1,0,15.4786
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,16,1,0,2.0701
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,2048,1,0,62.7867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,32,1,0,3.9624
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,512,1,0,30.8912
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,64,1,0,7.8188
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,128,1,0,19.7635
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,1,1,0,0.2445
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,16,1,0,0.2922
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,32,1,0,0.3036
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,64,1,0,0.3453
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1024,1,0,61.8630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,256,1,0,30.8541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,128,1,0,0.3806
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,256,1,0,0.5173
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,512,1,0,0.8123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,1024,1,0,1.4302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,1536,1,0,2.0137
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,2048,1,0,2.6527
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,3072,1,0,3.9482
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,512,1,0,61.5170
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,4096,1,0,5.4361
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,6144,1,0,7.8822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,8192,1,0,10.8884
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,10240,1,0,13.6123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,12288,1,0,16.4862
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,1,1,0,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,16,1,0,0.2995
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,32,1,0,0.3522
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,64,1,0,0.3798
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,16384,1,0,22.4162
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,128,1,0,0.5138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,256,1,0,0.8039
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,512,1,0,1.4100
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,1024,1,0,2.5845
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,1536,1,0,3.8216
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,1,32768,1,0,40.7536
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,2048,1,0,5.2704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,3072,1,0,7.5753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,4096,1,0,10.4817
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,6144,1,0,15.8331
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,1,1,0,0.2704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,8192,1,0,21.3209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,16,1,0,0.3523
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,10240,1,0,26.8280
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,12288,1,0,32.4478
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,32,1,0,0.3749
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,16384,1,0,36.8267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,64,1,0,0.5146
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,128,1,0,0.8021
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,256,1,0,1.3978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,512,1,0,2.5593
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,1024,1,0,5.1821
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,1536,1,0,7.3895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,2048,1,0,10.2114
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,3072,1,0,15.4231
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,2,32768,1,0,80.1227
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,4096,1,0,20.6440
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,6144,1,0,31.3019
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,1,1,0,0.2821
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,16,1,0,0.3781
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,8192,1,0,34.9163
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,32,1,0,0.5123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,10240,1,0,44.1374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,64,1,0,0.8009
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,128,1,0,1.3955
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,256,1,0,2.5509
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,12288,1,0,53.5722
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,512,1,0,5.1556
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,1024,1,0,10.1215
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,16384,1,0,72.9735
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,1536,1,0,15.1436
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,2048,1,0,20.0933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,3072,1,0,30.3854
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,4096,1,0,33.9789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,1,1,0,0.2991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,6144,1,0,50.1600
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,16,1,0,0.5078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,32,1,0,0.7995
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,8192,1,0,69.7901
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,64,1,0,1.3889
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,4,32768,1,0,158.9408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,128,1,0,2.5400
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,256,1,0,5.1408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,10240,1,0,86.8296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,512,1,0,10.0489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,1024,1,0,19.9541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,12288,1,0,107.0846
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,1536,1,0,29.7967
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,2048,1,0,32.8080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,1,1,0,0.3169
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,16,1,0,0.8006
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,32,1,0,1.3925
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,8,16384,1,0,145.9001
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,3072,1,0,50.2274
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,64,1,0,2.5547
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,128,1,0,5.1347
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,4096,1,0,67.9503
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,256,1,0,10.0557
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,512,1,0,19.8943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,1024,1,0,35.2574
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,6144,1,0,98.6387
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,1,1,0,0.3654
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,1536,1,0,48.9240
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,16,1,0,1.3867
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,32,1,0,2.5456
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,64,1,0,5.1350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,2048,1,0,65.4659
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,16,8192,1,0,139.7978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,128,1,0,10.0439
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,256,1,0,19.9270
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,1,1,0,0.4551
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,512,1,0,32.4066
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,3072,1,0,100.2917
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,16,1,0,2.5526
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,32,1,0,5.1288
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,64,1,0,10.0218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,32,4096,1,0,135.6674
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,1024,1,0,64.9810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,128,1,0,19.9046
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,1,1,0,0.6813
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,256,1,0,32.3471
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,16,1,0,5.1381
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,1536,1,0,97.6613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,32,1,0,10.0420
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,64,1,0,19.8892
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,512,1,0,64.8137
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,1,1,0,0.2062
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,64,2048,1,0,131.0869
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,128,1,0,32.3267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,16,1,0,0.2704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,32,1,0,0.2745
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,64,1,0,0.3190
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,128,1,0,0.3367
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,256,1,0,0.4435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,256,1,0,64.8621
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,512,1,0,0.6711
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,1024,1,0,1.1949
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,1536,1,0,1.6828
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,2048,1,0,2.1963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,3072,1,0,3.2314
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,4096,1,0,4.2693
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,128,1024,1,0,129.8907
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,6144,1,0,6.4024
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,8192,1,0,8.5790
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,10240,1,0,10.7834
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,12288,1,0,13.0743
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,1,1,0,0.2196
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,16,1,0,0.2755
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,16384,1,0,17.8576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,32,1,0,0.3231
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,64,1,0,0.3347
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,128,1,0,0.4400
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,256,1,0,0.6606
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,128,256,512,1,0,129.3374
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,512,1,0,1.1752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,1,32768,1,0,38.9150
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,1024,1,0,2.1416
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,1536,1,0,3.1193
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,2048,1,0,4.0890
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,3072,1,0,6.1323
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,4096,1,0,8.2179
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,6144,1,0,12.4108
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,8192,1,0,16.7701
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,1,1,0,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,16,1,0,0.3210
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,10240,1,0,21.1573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,12288,1,0,25.6988
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,32,1,0,0.3333
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,128,1,0,0.6581
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,64,1,0,0.4376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,16384,1,0,35.1957
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,256,1,0,1.1633
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,512,1,0,2.1141
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,1024,1,0,4.0202
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,1536,1,0,5.9598
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,2048,1,0,7.9184
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,2,32768,1,0,69.1088
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,3072,1,0,11.9811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,4096,1,0,16.1576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,6144,1,0,24.5531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,1,1,0,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,16,1,0,0.3330
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,8192,1,0,33.1112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,32,1,0,0.4390
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,10240,1,0,41.9161
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,64,1,0,0.6528
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,128,1,0,1.1582
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,256,1,0,2.1048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,12288,1,0,50.7631
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,512,1,0,4.0027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,1024,1,0,7.8198
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,16384,1,0,61.9623
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,1536,1,0,11.6896
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,2048,1,0,15.6219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,3072,1,0,23.9325
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,4096,1,0,32.1261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,1,1,0,0.2571
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,6144,1,0,48.9396
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,16,1,0,0.4393
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,32,1,0,0.6521
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,4,32768,1,0,136.8845
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,8192,1,0,58.8625
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,64,1,0,1.1571
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,128,1,0,2.0998
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,10240,1,0,74.4659
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,256,1,0,3.9912
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,512,1,0,7.7606
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,12288,1,0,90.6115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,1024,1,0,16.8252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,1536,1,0,23.2909
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,2048,1,0,31.0297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,1,1,0,0.2546
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,16,1,0,0.6549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,8,16384,1,0,123.8987
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,3072,1,0,47.4350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,32,1,0,1.1584
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,64,1,0,2.0987
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,4096,1,0,57.1901
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,128,1,0,3.9696
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,256,1,0,7.8027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,512,1,0,15.4020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,1024,1,0,30.7371
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,6144,1,0,86.8215
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,1,1,0,0.2817
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,1536,1,0,46.2300
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,16,1,0,1.1620
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,32,1,0,2.0949
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,64,1,0,3.9705
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,2048,1,0,54.5804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,16,8192,1,0,117.5518
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,128,1,0,7.7516
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,256,1,0,15.4111
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,3072,1,0,83.9324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,1,1,0,0.3251
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,512,1,0,30.6242
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,16,1,0,2.0987
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,32,1,0,3.9664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,64,1,0,7.7489
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,1024,1,0,53.9299
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,32,4096,1,0,113.5283
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,128,1,0,15.3603
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,1,1,0,0.4435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,16,1,0,3.9709
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,1536,1,0,81.1851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,256,1,0,30.6110
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,32,1,0,7.7394
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,64,1,0,15.3494
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,512,1,0,53.7214
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,64,2048,1,0,108.7271
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,1,1,0,0.1848
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,128,1,0,30.5397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,16,1,0,0.2410
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,32,1,0,0.2512
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,64,1,0,0.2871
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,128,1,0,0.2940
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,256,1,0,53.8931
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,256,1,0,0.3887
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,512,1,0,0.5452
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,1024,1,0,0.9294
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,1536,1,0,1.3283
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,2048,1,0,1.7054
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,3072,1,0,2.5136
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,4096,1,0,3.3027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,6144,1,0,4.9606
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,128,1024,1,0,119.6058
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,8192,1,0,6.6335
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,10240,1,0,8.3800
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,12288,1,0,10.1565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,1,1,0,0.2035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,64,256,512,1,0,107.4312
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,16,1,0,0.2495
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,32,1,0,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,64,1,0,0.2973
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,16384,1,0,14.0130
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,128,1,0,0.3826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,256,1,0,0.6057
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,1,32768,1,0,31.3036
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,512,1,0,0.9120
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,1024,1,0,1.6584
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,1536,1,0,2.4003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,2048,1,0,3.1146
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,3072,1,0,4.6967
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,4096,1,0,6.2720
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,6144,1,0,9.5323
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,8192,1,0,12.9354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,1,1,0,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,16,1,0,0.2892
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,10240,1,0,16.4021
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,12288,1,0,19.9465
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,32,1,0,0.3124
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,16384,1,0,27.4442
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,64,1,0,0.3810
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,128,1,0,0.5359
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,256,1,0,0.9010
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,512,1,0,1.6358
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,1024,1,0,3.0541
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,1536,1,0,4.5130
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,2048,1,0,5.9632
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,3072,1,0,9.0831
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,4096,1,0,12.3215
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,2,32768,1,0,61.0943
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,6144,1,0,18.7656
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,8192,1,0,25.5212
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,16,1,0,0.2978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,1,1,0,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,32,1,0,0.3811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,10240,1,0,32.3592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,12288,1,0,39.3645
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,64,1,0,0.5372
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,256,1,0,1.6285
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,128,1,0,0.8964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,512,1,0,3.0350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,1024,1,0,5.8794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,16384,1,0,53.9707
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,1536,1,0,8.8007
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,2048,1,0,11.7846
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,3072,1,0,18.1827
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,4096,1,0,24.5554
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,6144,1,0,37.4911
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,1,1,0,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,16,1,0,0.3839
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,4,32768,1,0,113.4967
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,8192,1,0,50.8599
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,32,1,0,0.5367
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,64,1,0,0.8939
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,128,1,0,1.6179
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,10240,1,0,64.4556
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,256,1,0,3.0302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,512,1,0,5.8332
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,1024,1,0,11.6391
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,12288,1,0,78.5248
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,1536,1,0,17.5035
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,2048,1,0,23.4009
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,8,16384,1,0,100.4999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,1,1,0,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,16,1,0,0.5373
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,3072,1,0,36.0740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,32,1,0,0.9094
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,4096,1,0,48.7840
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,64,1,0,1.6181
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,128,1,0,3.0148
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,256,1,0,5.8518
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,512,1,0,13.3093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,1024,1,0,23.0990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,6144,1,0,74.8894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,1,1,0,0.2604
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,1536,1,0,34.6918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,16,1,0,0.8992
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,16,8192,1,0,94.2659
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,32,1,0,1.6168
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,64,1,0,3.0039
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,2048,1,0,46.5733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,128,1,0,5.7990
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,256,1,0,11.5730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,3072,1,0,74.8404
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,1,1,0,0.3006
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,512,1,0,22.9577
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,16,1,0,1.6187
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,32,4096,1,0,90.0383
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,32,1,0,3.0048
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,64,1,0,5.8099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,1024,1,0,45.8989
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,128,1,0,11.5271
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,1,1,0,0.4093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,1536,1,0,69.2320
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,16,1,0,3.0070
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,256,1,0,22.9937
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,32,1,0,5.8133
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,64,2048,1,0,85.5027
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,64,1,0,11.5118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,512,1,0,45.6994
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,1,1,0,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,16,1,0,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,128,1,0,22.8698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,32,1,0,0.2334
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,64,1,0,0.2700
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,128,1,0,0.2783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,256,1,0,45.5973
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,128,1024,1,0,84.4924
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,256,1,0,0.3579
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,1024,1,0,0.7947
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,1536,1,0,1.1224
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,512,1,0,0.4926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,2048,1,0,1.4652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,3072,1,0,2.1573
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,4096,1,0,2.8316
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,6144,1,0,4.2441
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,8192,1,0,5.6922
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,10240,1,0,7.1838
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,32,256,512,1,0,84.1038
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,12288,1,0,8.7390
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,16384,1,0,12.1428
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,1,1,0,0.1897
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,16,1,0,0.2393
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,32,1,0,0.2745
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,64,1,0,0.2698
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,128,1,0,0.3572
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,256,1,0,0.4841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,512,1,0,0.7842
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,1,32768,1,0,27.5592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,1024,1,0,1.4115
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,1536,1,0,2.1877
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,2048,1,0,2.6533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,3072,1,0,3.9709
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,4096,1,0,5.3311
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,6144,1,0,8.0941
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,8192,1,0,11.0592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,1,1,0,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,16,1,0,0.2734
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,10240,1,0,14.0201
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,12288,1,0,17.0534
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,32,1,0,0.2743
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,16384,1,0,23.6090
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,64,1,0,0.3553
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,128,1,0,0.4800
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,256,1,0,0.7758
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,512,1,0,1.3841
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,1024,1,0,2.5853
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,1536,1,0,3.8082
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,2048,1,0,5.0447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,2,32768,1,0,53.5605
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,3072,1,0,7.6926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,4096,1,0,10.3769
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,6144,1,0,15.9202
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,1,1,0,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,8192,1,0,21.7258
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,16,1,0,0.2690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,10240,1,0,27.6187
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,12288,1,0,33.6354
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,32,1,0,0.3552
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,64,1,0,0.4809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,128,1,0,0.7694
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,256,1,0,1.3634
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,512,1,0,2.5544
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,16384,1,0,46.4318
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,1024,1,0,5.1766
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,1536,1,0,7.3929
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,2048,1,0,9.8696
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,3072,1,0,15.2565
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,4096,1,0,20.7218
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,6144,1,0,31.7791
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,1,1,0,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,16,1,0,0.3514
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,4,32768,1,0,105.6814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,8192,1,0,43.2615
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,64,1,0,0.7652
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,32,1,0,0.4801
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,128,1,0,1.3639
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,10240,1,0,55.0336
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,256,1,0,2.5446
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,512,1,0,4.9143
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,12288,1,0,67.2973
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,1024,1,0,9.6726
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,1536,1,0,14.6351
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,2048,1,0,19.5704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,1,1,0,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,8,16384,1,0,92.6999
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,3072,1,0,30.3730
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,16,1,0,0.4822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,32,1,0,0.7691
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,4096,1,0,41.3297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,64,1,0,1.3593
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,128,1,0,2.5386
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,256,1,0,4.9037
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,512,1,0,9.6409
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,6144,1,0,63.4994
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,1024,1,0,19.2596
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,1,1,0,0.2466
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,16,1,0,0.7724
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,1536,1,0,28.9955
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,16,8192,1,0,86.5326
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,32,1,0,1.3646
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,64,1,0,2.5282
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,2048,1,0,39.0752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,128,1,0,4.8681
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,256,1,0,9.5963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,3072,1,0,60.5086
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,512,1,0,19.1503
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,1,1,0,0.2822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,16,1,0,1.3628
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,32,4096,1,0,82.3138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,32,1,0,2.5329
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,1024,1,0,38.3935
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,64,1,0,4.8697
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,128,1,0,9.6087
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,1,1,0,0.3879
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,1536,1,0,57.8809
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,16,1,0,2.5300
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,256,1,0,23.3534
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,32,1,0,4.8709
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,64,2048,1,0,77.5526
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,512,1,0,38.2315
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,64,1,0,9.6069
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,1,1,0,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,128,1,0,19.0683
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,16,1,0,0.2339
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,32,1,0,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,64,1,0,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,128,1,0,0.2589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,256,1,0,38.2246
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,256,1,0,0.3413
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,128,1024,1,0,76.4915
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,512,1,0,0.4638
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,1024,1,0,0.7426
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,1536,1,0,1.0261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,2048,1,0,1.3311
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,3072,1,0,1.9637
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,4096,1,0,2.5934
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,6144,1,0,3.8978
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,8192,1,0,5.2251
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,16,256,512,1,0,75.8622
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,10240,1,0,6.6245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,12288,1,0,8.0467
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,1,1,0,0.1887
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,16384,1,0,11.1606
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,16,1,0,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,32,1,0,0.2690
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,64,1,0,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,128,1,0,0.3349
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,1,32768,1,0,25.6241
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,512,1,0,0.7265
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,1024,1,0,1.2812
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,256,1,0,0.4589
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,1536,1,0,1.8503
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,2048,1,0,2.4112
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,3072,1,0,3.6814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,4096,1,0,4.8458
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,6144,1,0,7.3819
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,8192,1,0,10.0973
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,10240,1,0,12.8290
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,12288,1,0,15.6444
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,1,1,0,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,16,1,0,0.2616
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,32,1,0,0.2618
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,16384,1,0,21.7172
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,64,1,0,0.3353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,128,1,0,0.4534
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,256,1,0,0.7199
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,512,1,0,1.2609
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,1024,1,0,2.3261
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,1536,1,0,3.4542
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,2048,1,0,4.5740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,2,32768,1,0,49.7826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,3072,1,0,6.9879
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,4096,1,0,9.4463
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,6144,1,0,14.5121
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,8192,1,0,19.8141
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,1,1,0,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,10240,1,0,25.2178
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,16,1,0,0.2704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,32,1,0,0.3381
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,12288,1,0,31.1173
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,64,1,0,0.4532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,128,1,0,0.7174
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,256,1,0,1.2408
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,512,1,0,2.3094
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,16384,1,0,42.7099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,1024,1,0,4.4753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,1536,1,0,6.7033
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,2048,1,0,8.9095
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,3072,1,0,13.8662
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,4096,1,0,18.7820
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,1,1,0,0.2192
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,6144,1,0,28.9669
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,16,1,0,0.3326
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,4,32768,1,0,98.1597
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,8192,1,0,39.5264
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,32,1,0,0.4546
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,64,1,0,0.7129
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,10240,1,0,50.3568
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,128,1,0,1.2324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,256,1,0,2.2971
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,512,1,0,4.4308
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,12288,1,0,61.5960
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,1024,1,0,8.7531
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,1536,1,0,13.2099
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,2048,1,0,17.6448
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,8,16384,1,0,85.1023
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,1,1,0,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,3072,1,0,28.9803
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,16,1,0,0.4558
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,32,1,0,0.7118
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,4096,1,0,37.5576
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,64,1,0,1.2423
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,128,1,0,2.2907
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,256,1,0,4.4383
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,512,1,0,8.7038
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,6144,1,0,57.8637
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,1024,1,0,17.3138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,1,1,0,0.2353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,1536,1,0,26.2363
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,16,1,0,0.7184
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,16,8192,1,0,78.8304
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,32,1,0,1.2360
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,2048,1,0,35.1642
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,64,1,0,2.2861
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,128,1,0,4.4010
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,3072,1,0,54.8193
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,256,1,0,8.7159
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,512,1,0,17.2533
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,1,1,0,0.2724
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,32,4096,1,0,74.7832
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,16,1,0,1.2410
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,32,1,0,2.2855
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,1024,1,0,34.5353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,64,1,0,4.4058
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,128,1,0,8.6660
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,1536,1,0,52.0351
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,1,1,0,0.3705
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,16,1,0,2.2839
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,256,1,0,17.2856
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,32,1,0,4.3961
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,64,2048,1,0,70.0423
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,512,1,0,34.3899
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,64,1,0,8.6659
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,1,1,0,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,128,1,0,17.1435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,16,1,0,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,32,1,0,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,64,1,0,0.2688
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,256,1,0,34.3957
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,128,1024,1,0,68.8851
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,128,1,0,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,256,1,0,0.3281
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,512,1,0,0.4494
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,1024,1,0,0.7165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,1536,1,0,0.9835
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,2048,1,0,1.2548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,3072,1,0,1.8753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,4096,1,0,2.4720
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,6144,1,0,3.6991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,8192,1,0,4.9733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,10240,1,0,6.3288
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,8,256,512,1,0,68.6308
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,12288,1,0,7.6886
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,1,1,0,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,16,1,0,0.2341
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,16384,1,0,10.6933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,64,1,0,0.2595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,32,1,0,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,128,1,0,0.3303
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,256,1,0,0.4443
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,1,32768,1,0,24.7078
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,512,1,0,0.6968
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,1024,1,0,1.2138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,1536,1,0,1.7543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,2048,1,0,2.2872
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,3072,1,0,3.4310
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,4096,1,0,4.7660
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,6144,1,0,7.0485
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,8192,1,0,9.6363
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,1,1,0,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,10240,1,0,12.2550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,16,1,0,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,12288,1,0,14.9256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,32,1,0,0.2592
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,16384,1,0,20.7491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,64,1,0,0.3278
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,256,1,0,0.6926
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,128,1,0,0.4393
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,512,1,0,1.1948
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,1024,1,0,2.2051
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,1536,1,0,3.2644
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,2,32768,1,0,47.9083
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,2048,1,0,4.3407
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,3072,1,0,6.6376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,4096,1,0,8.9869
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,6144,1,0,13.7985
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,8192,1,0,18.8636
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,1,1,0,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,10240,1,0,24.0318
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,16,1,0,0.2575
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,12288,1,0,29.3395
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,32,1,0,0.3277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,64,1,0,0.4378
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,128,1,0,0.6894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,256,1,0,1.1800
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,512,1,0,2.1932
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,16384,1,0,40.7627
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,1024,1,0,4.2353
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,1536,1,0,6.3491
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,2048,1,0,8.4437
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,3072,1,0,13.1744
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,4096,1,0,17.8414
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,1,1,0,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,6144,1,0,27.5348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,4,32768,1,0,94.3777
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,16,1,0,0.3252
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,8192,1,0,37.5933
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,32,1,0,0.4407
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,64,1,0,0.6894
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,128,1,0,1.1829
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,10240,1,0,47.8982
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,256,1,0,2.1587
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,512,1,0,4.1956
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,12288,1,0,58.7418
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,1024,1,0,8.2927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,1536,1,0,12.5296
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,2048,1,0,16.7013
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,8,16384,1,0,81.4260
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,1,1,0,0.2185
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,3072,1,0,26.0256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,16,1,0,0.4417
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,4096,1,0,35.5200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,32,1,0,0.6908
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,64,1,0,1.1822
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,128,1,0,2.1668
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,256,1,0,4.2013
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,512,1,0,8.2483
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,6144,1,0,54.9764
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,1024,1,0,16.4138
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,1536,1,0,24.7219
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,1,1,0,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,16,8192,1,0,75.0567
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,16,1,0,0.6918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,32,1,0,1.1824
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,2048,1,0,33.2981
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,64,1,0,2.1580
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,128,1,0,4.1655
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,3072,1,0,51.9412
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,256,1,0,8.2650
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,1,1,0,0.2695
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,512,1,0,16.3066
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,32,4096,1,0,70.9264
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,16,1,0,1.1814
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,32,1,0,2.1645
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,1024,1,0,32.6406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,64,1,0,4.1569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,128,1,0,8.1989
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,1536,1,0,49.2895
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,1,1,0,0.3663
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,256,1,0,16.3411
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,16,1,0,2.1686
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,64,2048,1,0,66.1664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,32,1,0,4.1564
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,512,1,0,32.4014
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,64,1,0,8.2046
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,128,1,0,16.2548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,1,1,0,0.1640
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,16,1,0,0.2277
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,32,1,0,0.2306
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,64,1,0,0.2648
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,128,1024,1,0,65.1742
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,256,1,0,32.4954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,128,1,0,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,256,1,0,0.3243
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,512,1,0,0.4435
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,1024,1,0,0.6991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,1536,1,0,0.9677
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,2048,1,0,1.2302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,3072,1,0,1.8146
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,4096,1,0,2.4088
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,4,256,512,1,0,64.6831
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,6144,1,0,3.6052
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,8192,1,0,4.8870
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,10240,1,0,6.1534
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,12288,1,0,7.5123
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,1,1,0,0.1807
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,16384,1,0,10.4419
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,16,1,0,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,32,1,0,0.2659
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,64,1,0,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,128,1,0,0.3200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,256,1,0,0.4376
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,1,32768,1,0,24.2159
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,512,1,0,0.6829
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,1024,1,0,1.1910
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,1536,1,0,1.7024
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,2048,1,0,2.2311
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,3072,1,0,3.3621
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,4096,1,0,4.4972
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,6144,1,0,6.8585
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,8192,1,0,9.4003
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,10240,1,0,11.9365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,1,1,0,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,12288,1,0,14.5763
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,16,1,0,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,32,1,0,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,16384,1,0,20.2913
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,64,1,0,0.3246
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,128,1,0,0.4329
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,256,1,0,0.6789
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,512,1,0,1.1664
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,1024,1,0,2.1514
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,1536,1,0,3.1866
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,2,32768,1,0,47.0013
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,2048,1,0,4.2165
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,3072,1,0,6.4484
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,4096,1,0,8.7316
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,6144,1,0,13.4293
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,8192,1,0,18.4008
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,1,1,0,0.1985
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,16,1,0,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,10240,1,0,23.4549
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,32,1,0,0.3234
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,12288,1,0,28.7069
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,128,1,0,0.6752
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,256,1,0,1.1543
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,64,1,0,0.4316
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,16384,1,0,39.8247
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,512,1,0,2.1357
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,1024,1,0,4.1093
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,1536,1,0,6.1476
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,2048,1,0,8.2225
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,3072,1,0,12.7761
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,4096,1,0,17.3875
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,1,1,0,0.2104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,6144,1,0,26.8263
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,4,32768,1,0,92.5885
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,16,1,0,0.3197
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,8192,1,0,36.6631
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,32,1,0,0.4322
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,64,1,0,0.6776
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,128,1,0,1.1540
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,10240,1,0,46.7397
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,256,1,0,2.3823
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,512,1,0,4.5240
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,12288,1,0,57.3623
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,1024,1,0,8.0214
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,1536,1,0,12.1753
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,2048,1,0,16.2557
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,8,16384,1,0,79.4905
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,1,1,0,0.2103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,3072,1,0,25.8927
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,16,1,0,0.4363
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,4096,1,0,34.6060
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,64,1,0,1.1502
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,32,1,0,0.6741
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,128,1,0,2.1030
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,256,1,0,4.0801
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,6144,1,0,53.5898
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,512,1,0,7.9964
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,1024,1,0,15.9104
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,16,8192,1,0,73.1741
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,16,1,0,0.6793
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,1536,1,0,24.0610
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,1,1,0,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,2048,1,0,32.3779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,32,1,0,1.1506
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,64,1,0,2.1069
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,128,1,0,4.0342
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,3072,1,0,50.4743
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,256,1,0,8.0302
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,512,1,0,15.8131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,1,1,0,0.2673
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,16,1,0,1.1585
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,32,4096,1,0,69.0340
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,32,1,0,2.1052
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,1024,1,0,31.7128
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,64,1,0,4.0421
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,128,1,0,7.9486
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,1536,1,0,47.7615
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,16,1,0,2.1103
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,256,1,0,15.8520
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,1,1,0,0.3595
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,32,1,0,4.0386
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,64,2048,1,0,64.3958
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,64,1,0,7.9437
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,512,1,0,31.5221
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,1,1,0,0.1671
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,16,1,0,0.2275
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,128,1,0,15.7431
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,32,1,0,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,64,1,0,0.2670
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,128,1,0,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,256,1,0,31.5774
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,128,1024,1,0,63.2455
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,256,1,0,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,512,1,0,0.4384
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,1024,1,0,0.6918
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,1536,1,0,0.9559
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,2048,1,0,1.2195
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,3072,1,0,1.7778
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,4096,1,0,2.3635
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,2,256,512,1,0,62.8033
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,6144,1,0,3.5802
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,8192,1,0,4.7984
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,10240,1,0,6.0863
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,12288,1,0,7.4406
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,16384,1,0,10.3473
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,1,1,0,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,16,1,0,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,64,1,0,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,128,1,0,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,32,1,0,0.2607
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,256,1,0,0.4337
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,512,1,0,0.6805
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,1,32768,1,0,23.9805
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,1024,1,0,1.1783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,1536,1,0,1.6807
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,2048,1,0,2.1826
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,3072,1,0,3.3182
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,4096,1,0,4.4525
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,6144,1,0,6.7932
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,8192,1,0,9.2740
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,10240,1,0,11.8049
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,1,1,0,0.1830
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,12288,1,0,14.4189
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,16,1,0,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,16384,1,0,20.0401
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,32,1,0,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,64,1,0,0.3200
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,128,1,0,0.4314
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,256,1,0,0.6729
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,512,1,0,1.1602
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,1024,1,0,2.1311
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,2048,1,0,4.1377
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,1536,1,0,3.4794
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,3072,1,0,6.3612
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,2,32768,1,0,46.5286
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,4096,1,0,8.6293
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,6144,1,0,13.2733
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,8192,1,0,18.1783
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,1,1,0,0.1963
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,16,1,0,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,10240,1,0,23.1757
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,32,1,0,0.3203
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,12288,1,0,28.3147
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,64,1,0,0.4300
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,128,1,0,0.6704
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,16384,1,0,39.3267
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,256,1,0,1.1535
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,512,1,0,2.1039
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,1024,1,0,4.0687
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,1536,1,0,6.0417
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,2048,1,0,8.0931
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,3072,1,0,12.6256
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,4096,1,0,17.1497
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,6144,1,0,26.4756
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,4,32768,1,0,91.6785
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,1,1,0,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,8192,1,0,36.1921
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,16,1,0,0.3231
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,32,1,0,0.4297
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,10240,1,0,46.1751
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,64,1,0,0.6703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,128,1,0,1.1444
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,256,1,0,2.0907
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,512,1,0,4.0154
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,12288,1,0,56.6131
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,1024,1,0,7.9509
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,1536,1,0,11.9892
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,2048,1,0,16.0365
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,8,16384,1,0,78.5257
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,1,1,0,0.2150
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,3072,1,0,24.9947
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,16,1,0,0.4309
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,32,1,0,0.6703
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,4096,1,0,34.1725
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,64,1,0,1.1502
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,128,1,0,2.0779
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,256,1,0,4.0080
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,512,1,0,7.8976
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,6144,1,0,52.8744
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,1024,1,0,15.7260
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,1,1,0,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,16,8192,1,0,72.1728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,1536,1,0,23.7224
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,16,1,0,0.6728
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,32,1,0,1.1484
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,2048,1,0,31.8968
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,64,1,0,2.0739
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,128,1,0,3.9708
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,3072,1,0,49.7965
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,256,1,0,7.8898
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,512,1,0,15.6050
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,1,1,0,0.2608
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,32,4096,1,0,68.0447
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,16,1,0,1.1477
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,32,1,0,2.0803
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,1024,1,0,31.3369
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,64,1,0,3.9914
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,128,1,0,7.8187
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,1,1,0,0.3581
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,1536,1,0,47.1608
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,16,1,0,2.0839
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,256,1,0,15.6480
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,64,2048,1,0,63.4991
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,512,1,0,30.9954
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,32,1,0,3.9776
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,64,1,0,7.8348
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,128,1,0,15.5150
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,128,1024,1,0,62.3085
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,256,1,0,31.0804
VLLM,0.16.0,NVIDIA B200,dsa_context_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,fp8,fp8,nvfp4,1,256,512,1,0,61.8501
