framework,version,device,op_name,kernel_source,model,architecture,mla_dtype,kv_cache_dtype,gemm_type,num_heads,batch_size,isl,tp_size,step,latency
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,2,0.2149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,512,0.2144
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,1024,0.2152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,4,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,8,0.2247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,16,0.2226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,128,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,256,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,2048,0.2424
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,64,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,32,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,4096,0.3096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,8192,0.2397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,16384,0.2500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,32768,0.2628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,2,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,4,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,65536,0.2916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1,1,1,131072,0.3503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,8,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,16,0.2317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,32,0.2298
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,64,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,128,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,512,0.2327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,256,0.2313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,1024,0.2541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,2048,0.2553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,4096,0.3548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,8192,0.2731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,16384,0.2867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,32768,0.3009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,65536,0.3273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,2,1,1,131072,0.3693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,2,0.2587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,4,0.2547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,8,0.2605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,16,0.2595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,32,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,64,0.2616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,256,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,512,0.2583
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,128,0.2655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,1024,0.2582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,2048,0.2655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,4096,0.3608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,8192,0.2776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,16384,0.2934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,32768,0.3044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,65536,0.3335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,4,1,1,131072,0.3748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,2,0.2689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,8,0.2678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,4,0.2711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,16,0.2694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,32,0.2751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,64,0.2722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,128,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,256,0.2675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,512,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,1024,0.2739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,2048,0.2747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,4096,0.3764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,8192,0.2914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,16384,0.3053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,32768,0.3152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,65536,0.3457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,8,1,1,131072,0.4061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,2,0.2821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,4,0.2798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,8,0.2821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,16,0.2852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,32,0.2823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,64,0.2782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,128,0.2845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,256,0.2808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,512,0.2801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,1024,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,2048,0.2911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,4096,0.3923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,8192,0.3014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,16384,0.3172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,32768,0.3372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,65536,0.3718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,16,1,1,131072,0.4450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,2,0.2966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,4,0.2977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,8,0.3013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,16,0.2967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,32,0.2941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,64,0.2984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,128,0.2954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,256,0.2981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,512,0.3003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,1024,0.3002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,2048,0.3108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,4096,0.4066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,8192,0.3201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,16384,0.3393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,32768,0.3634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,65536,0.4132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,32,1,1,131072,0.5006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,2,0.3259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,4,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,8,0.3220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,32,0.3274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,16,0.3281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,64,0.3261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,128,0.3247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,256,0.3265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,512,0.3286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,1024,0.3323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,2048,0.3373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,4096,0.4407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,8192,0.3588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,16384,0.3844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,65536,0.4846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,32768,0.4124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,64,1,1,131072,0.6113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,2,0.3983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,4,0.3973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,8,0.4012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,16,0.3998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,32,0.4006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,128,0.3974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,64,0.3971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,512,0.4057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,256,0.4004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,1024,0.4061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,2048,0.4236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,4096,0.5323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,8192,0.4523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,16384,0.4848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,32768,0.5422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,65536,0.6481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,128,1,1,131072,0.8619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,2,0.5450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,4,0.5425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,8,0.5457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,16,0.5448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,32,0.5438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,64,0.5470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,128,0.5461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,256,0.5580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,512,0.5626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,1024,0.5777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,2048,0.6050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,4096,0.7930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,16384,0.7181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,8192,0.6446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,65536,1.0027
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,32768,0.8156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,256,1,1,131072,1.4274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,2,0.8507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,4,0.8491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,8,0.8467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,16,0.8476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,32,0.8478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,64,0.8487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,128,0.8564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,256,0.8692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,512,0.8839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,1024,0.9070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,2048,0.9665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,4096,1.3159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,8192,1.0463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,16384,1.1901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,32768,1.3898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,512,1,1,65536,1.7726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,2,1.4932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,4,1.4896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,8,1.4927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,16,1.4904
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,32,1.5067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,64,1.4967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,128,1.5156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,256,1.5343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,512,1.5639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,2048,1.7305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,1024,1.6095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,4096,2.3340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,8192,1.8849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,16384,2.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,2,0.2198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,4,0.2164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,8,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,128,1024,1,1,32768,2.5468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,16,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,32,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,64,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,128,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,256,0.2173
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,1024,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,512,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,2048,0.2192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,4096,0.3425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,8192,0.2275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,16384,0.2544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,65536,0.2947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,131072,0.3225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1,1,1,32768,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,4,0.2349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,2,0.2374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,8,0.2269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,16,0.2337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,32,0.2311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,64,0.2536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,128,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,256,0.2312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,512,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,1024,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,2048,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,4096,0.3260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,8192,0.2520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,16384,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,32768,0.2634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,65536,0.2995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,2,1,1,131072,0.3425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,2,0.2357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,4,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,8,0.2334
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,16,0.2396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,32,0.2394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,64,0.2336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,128,0.2383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,1024,0.2380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,2048,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,4096,0.3327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,256,0.2397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,8192,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,512,0.2395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,16384,0.2649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,32768,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,65536,0.3115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,4,1,1,131072,0.3564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,4,0.2435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,2,0.2442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,8,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,16,0.2438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,32,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,64,0.2485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,256,0.2463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,128,0.2453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,512,0.2465
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,1024,0.2444
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,2048,0.2477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,4096,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,8192,0.2568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,16384,0.2797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,32768,0.2917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,131072,0.3800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,2,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,4,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,8,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,8,1,1,65536,0.3231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,16,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,32,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,64,0.2535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,128,0.2519
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,256,0.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,512,0.2484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,1024,0.2506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,2048,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,4096,0.3530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,8192,0.2770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,16384,0.2895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,32768,0.3077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,65536,0.3485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,16,1,1,131072,0.4090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,4,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,2,0.2624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,8,0.2601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,16,0.2663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,32,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,128,0.2628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,64,0.2642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,1024,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,512,0.2628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,2048,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,4096,0.3693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,256,0.2623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,8192,0.2908
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,32768,0.3311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,16384,0.3082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,131072,0.4633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,4,0.2834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,8,0.2869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,2,0.2900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,32,1,1,65536,0.3799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,16,0.2844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,64,0.2797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,32,0.2834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,256,0.2839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,512,0.2942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,128,0.2832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,1024,0.2876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,2048,0.2978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,4096,0.4007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,8192,0.3146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,16384,0.3426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,32768,0.3735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,65536,0.4453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,2,0.3436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,8,0.3425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,4,0.3431
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,64,1,1,131072,0.5693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,16,0.3399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,32,0.3426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,64,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,128,0.3423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,512,0.3506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,256,0.3456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,1024,0.3566
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,2048,0.3639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,4096,0.4759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,8192,0.3949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,16384,0.4315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,32768,0.4880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,65536,0.5978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,2,0.4565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,4,0.4917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,128,1,1,131072,0.8058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,8,0.4542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,16,0.4618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,32,0.4551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,64,0.4559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,256,0.4661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,512,0.4735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,128,0.4612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,1024,0.4836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,2048,0.5077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,4096,0.7011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,8192,0.5538
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,16384,0.6273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,32768,0.7221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,65536,0.9147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,2,0.6835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,256,1,1,131072,1.3436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,4,0.6825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,8,0.6856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,16,0.6869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,32,0.6917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,64,0.6991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,128,0.7036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,256,0.7087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,512,0.7252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,2048,0.8029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,1024,0.7472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,4096,1.1527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,8192,0.8829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,16384,1.0292
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,32768,1.2178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,2,1.3265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,4,1.1926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,8,1.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,512,1,1,65536,1.5971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,16,1.1981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,32,1.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,128,1.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,64,1.2113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,256,1.2333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,512,1.2522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,1024,1.3013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,2048,1.4151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,4096,2.0233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,8192,1.5703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,2,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,4,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,16384,1.8470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,8,0.2010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,16,0.2193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,64,1024,1,1,32768,2.2544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,32,0.2068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,64,0.1994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,128,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,256,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,512,0.2055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,1024,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,2048,0.2047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,4096,0.3128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,8192,0.2172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,16384,0.2317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,32768,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,65536,0.2600
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1,1,1,131072,0.3089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,2,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,4,0.2128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,8,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,16,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,32,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,64,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,128,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,256,0.2329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,512,0.2153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,1024,0.2177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,4096,0.3081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,2048,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,8192,0.2299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,32768,0.2481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,16384,0.2415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,65536,0.2830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,2,1,1,131072,0.3205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,2,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,4,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,16,0.2194
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,8,0.2153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,32,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,128,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,64,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,256,0.2208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,512,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,1024,0.2197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,2048,0.2244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,4096,0.3104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,8192,0.2356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,16384,0.2519
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,32768,0.2580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,65536,0.2897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,4,1,1,131072,0.3371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,2,0.2298
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,16,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,8,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,4,0.2257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,32,0.2336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,64,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,128,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,256,0.2298
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,512,0.2287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,1024,0.2312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,2048,0.2300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,4096,0.3313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,8192,0.2470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,16384,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,32768,0.2726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,65536,0.3071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,8,1,1,131072,0.3621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,2,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,4,0.2320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,8,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,32,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,16,0.2339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,64,0.2310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,128,0.2369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,256,0.2374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,512,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,1024,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,2048,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,4096,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,8192,0.2571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,16384,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,65536,0.3266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,32768,0.2900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,16,1,1,131072,0.3964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,2,0.2391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,8,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,4,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,16,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,32,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,64,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,128,0.2451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,256,0.2454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,1024,0.2500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,2048,0.2526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,512,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,4096,0.3512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,8192,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,16384,0.2873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,32768,0.3103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,65536,0.3588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,32,1,1,131072,0.4427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,2,0.2578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,4,0.2599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,8,0.2537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,16,0.2614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,32,0.2585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,64,0.2592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,128,0.2605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,512,0.2601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,1024,0.2650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,256,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,2048,0.2708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,4096,0.3722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,16384,0.3167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,8192,0.2919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,32768,0.3505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,65536,0.4151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,64,1,1,131072,0.5458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,2,0.3042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,4,0.3037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,8,0.3084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,16,0.3054
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,32,0.3095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,64,0.3083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,128,0.3068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,256,0.3105
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,512,0.3197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,1024,0.3195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,2048,0.3327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,4096,0.4408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,8192,0.3597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,16384,0.3967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,32768,0.4496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,65536,0.5606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,2,0.3989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,128,1,1,131072,0.7691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,4,0.4022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,8,0.4010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,16,0.4034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,32,0.4026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,64,0.3999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,128,0.4042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,256,0.4063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,512,0.4282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,1024,0.4258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,2048,0.4518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,4096,0.6423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,16384,0.5662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,8192,0.4967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,32768,0.6613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,65536,0.8557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,256,1,1,131072,1.2819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,2,0.5729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,4,0.5712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,8,0.5713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,16,0.5697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,32,0.5720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,64,0.5754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,128,0.5754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,256,0.6269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,512,0.6015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,1024,0.6238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,2048,0.6764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,4096,1.0320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,8192,0.7561
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,16384,0.8974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,32768,1.1012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,2,0.9465
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,512,1,1,65536,1.4848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,4,0.9505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,8,0.9530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,16,0.9600
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,32,1.1664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,64,0.9706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,128,0.9701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,256,0.9928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,512,1.0648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,1024,1.0701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,2048,1.1764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,4096,1.7888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,16384,1.6010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,8192,1.3218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,2,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,32,1024,1,1,32768,1.9951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,4,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,8,0.1983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,16,0.1951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,32,0.1986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,64,0.1971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,128,0.1964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,256,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,512,0.2032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,1024,0.2066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,2048,0.2171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,4096,0.2950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,8192,0.2087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,16384,0.2254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,32768,0.2431
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,65536,0.2608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1,1,1,131072,0.2975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,2,0.1973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,8,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,16,0.1871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,4,0.1984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,32,0.1931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,64,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,128,0.1984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,256,0.2068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,512,0.2064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,1024,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,2048,0.2026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,4096,0.3004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,8192,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,16384,0.2339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,32768,0.2380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,65536,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,2,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,2,1,1,131072,0.3093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,4,0.2053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,8,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,64,0.2149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,32,0.2090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,128,0.2002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,16,0.2130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,256,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,512,0.1973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,1024,0.2071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,2048,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,4096,0.3099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,8192,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,16384,0.2443
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,32768,0.2513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,65536,0.2775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,4,1,1,131072,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,2,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,4,0.2124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,8,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,16,0.2105
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,64,0.2190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,32,0.2154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,128,0.2128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,256,0.2184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,512,0.2230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,2048,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,1024,0.2213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,4096,0.3233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,16384,0.2520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,8192,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,32768,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,65536,0.3008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,8,1,1,131072,0.3584
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,2,0.2253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,4,0.2106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,8,0.2198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,16,0.2254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,32,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,64,0.2231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,128,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,256,0.2256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,512,0.2254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,1024,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,2048,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,4096,0.3287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,8192,0.2432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,16384,0.2599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,32768,0.2868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,65536,0.3188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,4,0.2335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,2,0.2317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,16,1,1,131072,0.3859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,8,0.2274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,16,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,32,0.2280
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,64,0.2339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,128,0.2362
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,256,0.2359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,512,0.2337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,1024,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,2048,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,16384,0.2804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,4096,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,8192,0.2552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,32768,0.3001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,65536,0.3493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,32,1,1,131072,0.4363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,2,0.2460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,4,0.2451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,8,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,16,0.2474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,32,0.2518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,64,0.2473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,128,0.2481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,256,0.2453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,512,0.2515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,1024,0.2564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,2048,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,4096,0.3625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,8192,0.2781
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,16384,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,32768,0.3372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,131072,0.5336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,64,1,1,65536,0.4075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,2,0.2891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,4,0.2887
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,8,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,32,0.2896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,16,0.2867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,64,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,128,0.2909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,256,0.2931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,512,0.2929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,1024,0.3037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,2048,0.3154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,4096,0.4222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,16384,0.3795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,8192,0.3402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,32768,0.4322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,65536,0.5407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,2,0.3747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,128,1,1,131072,0.7546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,4,0.3721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,8,0.3733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,16,0.3709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,32,0.3967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,64,0.3714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,128,0.3756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,256,0.3906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,512,0.3886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,1024,0.4008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,2048,0.4227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,4096,0.6118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,8192,0.4641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,32768,0.6342
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,16384,0.5366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,65536,0.8259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,2,0.5178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,4,0.5186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,256,1,1,131072,1.2477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,8,0.5882
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,16,0.5178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,32,0.5182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,64,0.5209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,128,0.5213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,256,0.5348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,512,0.5436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,1024,0.5680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,2048,0.6155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,4096,0.9717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,8192,0.6977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,16384,0.8417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,32768,1.0439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,2,0.8299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,512,1,1,65536,1.4281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,4,0.8333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,8,0.8321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,16,0.8365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,32,0.8424
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,64,0.8509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,128,0.9728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,256,0.8704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,512,0.8889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,1024,0.9379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,2048,1.0437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,4096,1.6541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,16384,1.4795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,8192,1.1930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,16,1024,1,1,32768,1.8627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,4,0.1931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,2,0.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,8,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,16,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,32,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,64,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,128,0.1881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,256,0.1868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,512,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,1024,0.1787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,2048,0.1940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,4096,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,8192,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,16384,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,32768,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,65536,0.2357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1,1,1,131072,0.2948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,2,0.1753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,4,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,8,0.1804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,16,0.1898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,32,0.1851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,64,0.1859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,128,0.1924
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,1024,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,2048,0.1862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,4096,0.2835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,512,0.1769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,8192,0.2076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,256,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,16384,0.2146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,32768,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,131072,0.3149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,2,1,1,65536,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,2,0.1884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,4,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,8,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,16,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,32,0.2033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,64,0.1973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,128,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,256,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,512,0.1867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,1024,0.1925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,2048,0.1994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,4096,0.2942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,8192,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,16384,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,32768,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,65536,0.2852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,4,1,1,131072,0.3243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,2,0.1945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,4,0.1981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,32,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,16,0.2129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,8,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,64,0.2005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,128,0.2051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,256,0.1973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,512,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,1024,0.2086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,2048,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,8192,0.2273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,4096,0.3063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,16384,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,32768,0.2639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,65536,0.2893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,8,1,1,131072,0.3466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,2,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,4,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,8,0.2210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,16,0.2149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,32,0.2197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,64,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,128,0.2096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,512,0.2146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,256,0.2124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,1024,0.2214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,2048,0.2310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,4096,0.3259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,8192,0.2424
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,16384,0.2663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,32768,0.2737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,65536,0.3141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,16,1,1,131072,0.3792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,2,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,8,0.2269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,16,0.2294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,32,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,4,0.2277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,64,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,128,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,256,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,512,0.2269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,1024,0.2360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,2048,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,4096,0.3417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,8192,0.2575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,16384,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,65536,0.3422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,131072,0.4332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,32,1,1,32768,0.2964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,2,0.2414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,4,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,8,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,16,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,32,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,64,0.2371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,256,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,512,0.2432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,128,0.2401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,1024,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,2048,0.2559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,4096,0.3572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,8192,0.2756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,16384,0.2971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,32768,0.3286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,65536,0.3991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,64,1,1,131072,0.5279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,2,0.2791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,4,0.2802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,8,0.2881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,16,0.2812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,64,0.2760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,128,0.2807
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,32,0.2780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,256,0.2850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,512,0.2961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,1024,0.2963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,2048,0.3085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,4096,0.4152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,8192,0.3305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,16384,0.3714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,32768,0.4231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,65536,0.5324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,2,0.3582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,128,1,1,131072,0.7489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,4,0.3598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,8,0.3604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,16,0.3561
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,32,0.3598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,64,0.3571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,256,0.3682
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,512,0.3855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,128,0.3602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,1024,0.3861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,2048,0.4091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,4096,0.5946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,8192,0.4529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,16384,0.5179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,32768,0.6219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,65536,0.8190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,2,0.4921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,4,0.4934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,256,1,1,131072,1.2337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,8,0.4906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,16,0.4931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,32,0.4927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,64,0.4939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,128,0.4974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,256,0.5049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,512,0.5176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,1024,0.5395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,2048,0.5907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,4096,0.9504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,8192,0.6691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,16384,0.8168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,32768,1.0137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,2,0.7796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,512,1,1,65536,1.3957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,4,0.8646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,8,0.7745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,16,0.7823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,32,0.7867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,64,0.7918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,128,0.7951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,256,0.8668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,2048,0.9944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,512,0.8392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,1024,0.8805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,16384,1.4162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,4096,1.6007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,2,0.1829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,8192,1.1343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,8,1024,1,1,32768,1.8110
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,4,0.1706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,8,0.1793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,16,0.1802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,32,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,128,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,64,0.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,256,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,512,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,1024,0.1647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,2048,0.1797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,8192,0.1922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,16384,0.1994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,4096,0.2732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,32768,0.2152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,65536,0.2390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,4,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,2,0.1757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1,1,1,131072,0.2789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,8,0.1907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,16,0.1821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,32,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,64,0.1911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,128,0.1846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,256,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,512,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,2048,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,1024,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,8192,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,4096,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,16384,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,32768,0.2210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,65536,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,2,1,1,131072,0.3033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,4,0.1865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,2,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,8,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,128,0.1882
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,64,0.1860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,32,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,16,0.1886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,256,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,512,0.1897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,1024,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,2048,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,4096,0.2945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,8192,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,16384,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,32768,0.2403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,65536,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,4,1,1,131072,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,2,0.1986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,4,0.1944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,8,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,16,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,32,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,64,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,128,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,256,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,512,0.1986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,1024,0.1966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,2048,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,4096,0.3021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,8192,0.2171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,16384,0.2339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,32768,0.2643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,65536,0.2880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,8,1,1,131072,0.3518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,2,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,4,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,16,0.2076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,8,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,32,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,64,0.2012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,128,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,256,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,512,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,1024,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,2048,0.2300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,4096,0.3248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,8192,0.2391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,16384,0.2562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,32768,0.2731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,65536,0.3136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,16,1,1,131072,0.3845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,2,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,4,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,8,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,16,0.2153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,32,0.2068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,128,0.2186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,64,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,256,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,512,0.2133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,1024,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,2048,0.2395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,4096,0.3385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,16384,0.2655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,32768,0.2935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,65536,0.3414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,8192,0.2468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,32,1,1,131072,0.4343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,2,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,4,0.2319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,8,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,16,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,32,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,64,0.2235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,128,0.2330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,256,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,512,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,1024,0.2453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,2048,0.2525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,4096,0.3522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,8192,0.2723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,16384,0.2962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,32768,0.3269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,65536,0.3981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,64,1,1,131072,0.5271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,2,0.2708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,4,0.2752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,8,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,16,0.2718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,32,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,64,0.2725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,128,0.2742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,256,0.2793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,512,0.2826
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,1024,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,2048,0.3026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,4096,0.4053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,8192,0.3299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,16384,0.3649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,32768,0.4166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,65536,0.5295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,128,1,1,131072,0.7395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,2,0.3509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,4,0.3533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,8,0.3473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,16,0.3513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,32,0.3513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,64,0.3808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,128,0.3542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,256,0.3610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,512,0.3657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,1024,0.3771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,2048,0.4003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,4096,0.5958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,16384,0.5147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,8192,0.4445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,32768,0.6121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,65536,0.8072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,256,1,1,131072,1.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,2,0.4785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,4,0.4763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,8,0.4789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,16,0.4783
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,32,0.5561
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,64,0.4835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,128,0.4864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,256,0.4951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,512,0.5049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,2048,0.5774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,1024,0.5237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,4096,0.9267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,8192,0.6574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,16384,0.8021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,32768,1.0000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,2,0.7498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,512,1,1,65536,1.3910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,4,0.7502
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,8,0.7532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,32,0.7611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,16,0.7532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,64,0.8247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,128,0.7740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,256,0.7870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,512,0.8130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,2048,0.9692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,1024,0.8637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,4096,1.5715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,16384,1.3917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,8192,1.1128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,4,1024,1,1,32768,1.7777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,2,0.1691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,4,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,8,0.1647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,16,0.1610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,32,0.1733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,64,0.1607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,128,0.1667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,256,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,512,0.1644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,1024,0.1713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,2048,0.1764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,4096,0.2828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,8192,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,16384,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,32768,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,65536,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1,1,1,131072,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,2,0.1880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,8,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,4,0.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,16,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,32,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,64,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,128,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,512,0.1788
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,1024,0.1808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,2048,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,256,0.1835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,4096,0.2719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,8192,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,16384,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,32768,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,65536,0.2580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,2,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,2,1,1,131072,0.3058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,8,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,4,0.1835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,16,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,32,0.1801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,64,0.1824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,128,0.1846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,256,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,512,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,1024,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,2048,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,4096,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,8192,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,16384,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,32768,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,65536,0.2715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,4,1,1,131072,0.3227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,2,0.1942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,4,0.1907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,8,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,16,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,32,0.1869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,64,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,128,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,256,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,512,0.1929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,1024,0.1921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,2048,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,4096,0.2914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,8192,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,16384,0.2382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,32768,0.2627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,65536,0.2889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,8,1,1,131072,0.3505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,2,0.1973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,4,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,32,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,16,0.1972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,8,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,64,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,128,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,256,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,512,0.2008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,1024,0.2074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,2048,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,8192,0.2287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,16384,0.2539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,4096,0.3282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,32768,0.2738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,65536,0.3090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,2,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,16,1,1,131072,0.3787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,8,0.2092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,4,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,16,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,32,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,64,0.2055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,128,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,256,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,1024,0.2300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,512,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,2048,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,4096,0.3261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,8192,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,16384,0.2668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,32768,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,65536,0.3396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,32,1,1,131072,0.4311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,2,0.2296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,4,0.2193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,8,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,16,0.2341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,64,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,32,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,128,0.2318
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,256,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,512,0.2375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,1024,0.2395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,4096,0.3534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,2048,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,8192,0.2686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,16384,0.2971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,32768,0.3283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,65536,0.3954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,4,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,2,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,64,1,1,131072,0.5272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,8,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,16,0.2703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,32,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,64,0.2696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,128,0.2735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,256,0.2780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,512,0.2780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,1024,0.2880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,2048,0.2962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,4096,0.4083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,8192,0.3283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,16384,0.3636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,32768,0.4187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,65536,0.5273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,2,0.3491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,128,1,1,131072,0.7392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,4,0.3450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,8,0.3480
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,16,0.3485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,32,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,64,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,128,0.3495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,256,0.3564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,512,0.3706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,2048,0.3977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,1024,0.3785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,4096,0.5858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,8192,0.4411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,16384,0.5145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,32768,0.6100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,65536,0.8058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,2,0.4707
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,4,0.4714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,8,0.4724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,16,0.4708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,32,0.4708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,256,1,1,131072,1.2271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,64,0.4759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,128,0.4799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,256,0.4858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,512,0.4978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,2048,0.5674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,1024,0.5353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,4096,0.9192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,16384,0.7945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,8192,0.6473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,32768,0.9906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,512,1,1,65536,1.3847
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,2,0.7371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,4,0.7377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,16,0.7419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,8,0.7392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,32,0.7470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,64,0.7519
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,128,0.7607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,256,0.7759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,512,0.7983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,2048,0.9447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,16384,1.3824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,4096,1.5542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,1024,0.8389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,8192,1.0984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,2,0.1705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,2,1024,1,1,32768,1.7627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,4,0.1648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,8,0.1591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,16,0.1553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,32,0.1587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,64,0.1628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,128,0.1650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,256,0.1699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,512,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,1024,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,2048,0.1623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,4096,0.2530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,8192,0.1789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,16384,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,32768,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,65536,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1,1,1,131072,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,2,0.1782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,4,0.1728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,8,0.1717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,16,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,32,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,64,0.1785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,128,0.1809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,256,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,512,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,1024,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,2048,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,4096,0.2820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,8192,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,16384,0.2083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,32768,0.2148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,65536,0.2460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,2,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,2,1,1,131072,0.3034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,4,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,8,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,16,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,32,0.1834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,64,0.1782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,128,0.1796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,256,0.1765
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,512,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,2048,0.1962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,1024,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,4096,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,8192,0.2001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,16384,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,65536,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,131072,0.3191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,4,1,1,32768,0.2320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,2,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,4,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,16,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,8,0.1838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,32,0.1883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,64,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,128,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,256,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,1024,0.1927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,512,0.1902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,2048,0.1888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,4096,0.2917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,8192,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,16384,0.2266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,32768,0.2538
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,65536,0.2893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,8,1,1,131072,0.3470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,2,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,4,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,8,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,16,0.1967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,32,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,64,0.2021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,128,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,256,0.2009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,1024,0.2129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,2048,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,512,0.1972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,4096,0.3217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,8192,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,16384,0.2555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,32768,0.2711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,65536,0.3070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,16,1,1,131072,0.3713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,2,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,4,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,8,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,16,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,32,0.2063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,64,0.2082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,128,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,256,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,2048,0.2341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,512,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,1024,0.2218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,4096,0.3321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,8192,0.2510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,16384,0.2643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,32768,0.2918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,65536,0.3406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,32,1,1,131072,0.4267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,4,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,8,0.2226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,16,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,2,0.2275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,32,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,64,0.2269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,128,0.2252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,256,0.2329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,512,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,2048,0.2443
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,1024,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,4096,0.3484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,16384,0.2912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,8192,0.2627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,32768,0.3236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,65536,0.3956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,64,1,1,131072,0.5226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,2,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,4,0.2689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,8,0.2685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,16,0.2663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,64,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,32,0.2663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,128,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,256,0.2746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,512,0.2812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,1024,0.2851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,2048,0.2975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,4096,0.4058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,16384,0.3636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,8192,0.3233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,32768,0.4116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,65536,0.5238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,4,0.3438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,128,1,1,131072,0.7406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,2,0.3450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,8,0.3457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,16,0.3440
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,32,0.3440
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,64,0.3411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,128,0.3490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,256,0.3531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,512,0.3557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,1024,0.3720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,2048,0.3942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,4096,0.5828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,8192,0.4392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,16384,0.5045
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,32768,0.6060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,65536,0.8021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,2,0.5319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,4,0.4632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,16,0.4654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,32,0.4663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,8,0.4662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,256,1,1,131072,1.2234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,64,0.4702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,128,0.4725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,256,0.4810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,512,0.4940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,2048,0.5649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,4096,0.9155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,1024,0.5148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,8192,0.6451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,16384,0.7903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,32768,0.9839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,2,0.7315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,4,0.7313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,512,1,1,65536,1.3743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,8,0.7327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,16,0.8083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,32,0.7420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,64,0.7491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,128,0.7468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,256,0.7670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,512,0.7904
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,2048,0.9371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,4096,1.5457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,1024,0.8502
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,16384,1.3710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,8192,1.0989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,2,0.2258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,4,0.2334
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,float16,1,1024,1,1,32768,1.7630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,8,0.2525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,32,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,16,0.2277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,64,0.2274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,128,0.2321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,256,0.2269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,512,0.2362
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,1024,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,4096,0.3195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,8192,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,16384,0.2585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,2048,0.2537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,32768,0.2696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,65536,0.2879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1,1,1,131072,0.3406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,2,0.2516
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,4,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,8,0.2419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,16,0.2437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,32,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,64,0.2456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,128,0.2390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,256,0.2442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,512,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,1024,0.2416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,2048,0.2414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,8192,0.2492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,4096,0.3366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,16384,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,32768,0.2778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,65536,0.3090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,2,1,1,131072,0.3441
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,2,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,4,0.2534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,8,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,32,0.2583
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,64,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,16,0.2562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,128,0.2558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,256,0.2538
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,512,0.2552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,1024,0.2587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,2048,0.2543
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,4096,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,32768,0.2946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,16384,0.2841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,8192,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,65536,0.3188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,4,1,1,131072,0.3701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,2,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,4,0.2633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,8,0.2986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,16,0.2678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,32,0.2685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,64,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,128,0.2681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,256,0.2723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,512,0.2650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,1024,0.2658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,2048,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,4096,0.3625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,8192,0.2826
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,16384,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,32768,0.3113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,131072,0.3902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,8,1,1,65536,0.3416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,2,0.2761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,4,0.2798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,8,0.2804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,16,0.2805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,32,0.2796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,64,0.2817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,256,0.2809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,128,0.2782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,512,0.2764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,1024,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,2048,0.2828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,4096,0.3799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,8192,0.2927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,65536,0.3656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,32768,0.3243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,16384,0.3140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,16,1,1,131072,0.4290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,2,0.3044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,4,0.3036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,8,0.3065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,16,0.3065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,64,0.3053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,32,0.3021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,128,0.3059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,256,0.3027
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,512,0.3063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,1024,0.3061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,2048,0.3098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,4096,0.4072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,8192,0.3247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,32768,0.3671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,16384,0.3402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,65536,0.4094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,32,1,1,131072,0.5019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,2,0.3623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,4,0.3611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,8,0.3635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,16,0.3619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,32,0.3629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,64,0.3598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,128,0.3600
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,256,0.3638
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,512,0.3635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,1024,0.3640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,2048,0.3681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,4096,0.4697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,8192,0.3888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,16384,0.4088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,32768,0.4407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,65536,0.5145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,64,1,1,131072,0.6404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,2,0.4592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,4,0.4576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,8,0.4582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,16,0.4585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,32,0.4535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,64,0.4554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,128,0.4604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,256,0.4598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,512,0.4601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,1024,0.4622
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,2048,0.4657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,4096,0.5701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,8192,0.4905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,16384,0.5243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,32768,0.5822
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,65536,0.6914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,128,1,1,131072,0.9077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,2,0.6957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,4,0.6947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,16,0.6936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,8,0.6940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,32,0.6949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,64,0.7004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,128,0.7061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,256,0.7067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,512,0.7074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,2048,0.7203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,4096,0.9042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,1024,0.7082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,8192,0.7660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,16384,0.8331
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,32768,0.9265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,4,1.1692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,2,1.1686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,8,1.1685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,65536,1.1189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,256,1,1,131072,1.5498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,16,1.1695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,32,1.1748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,64,1.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,128,1.1843
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,256,1.1841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,512,1.1861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,1024,1.1954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,2048,1.2156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,4096,1.5581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,8192,1.3022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,16384,1.4325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,32768,1.6265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,2,2.0265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,4,2.0300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,8,2.0341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,32,2.0325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,64,2.0407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,512,1,1,65536,2.0124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,16,2.0338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,128,2.0414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,256,2.0417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,512,2.0534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,16384,2.5078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,2048,2.1033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,4096,2.7062
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,1024,2.0707
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,8192,2.2676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,2,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,128,1024,1,1,32768,2.8972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,4,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,8,0.1846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,32,0.1967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,64,0.1879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,16,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,128,0.1879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,256,0.1843
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,512,0.1925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,1024,0.1869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,2048,0.1963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,4096,0.2734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,8192,0.2006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,16384,0.2146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,32768,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,65536,0.2473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1,1,1,131072,0.3036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,4,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,2,0.2014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,8,0.1968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,16,0.2001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,32,0.2011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,64,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,128,0.2003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,256,0.2052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,512,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,1024,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,2048,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,8192,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,16384,0.2235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,32768,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,4096,0.2941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,65536,0.2610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,2,1,1,131072,0.3080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,2,0.2094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,4,0.2043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,16,0.2125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,8,0.2093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,64,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,32,0.2048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,128,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,256,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,512,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,1024,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,2048,0.2130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,4096,0.3067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,8192,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,16384,0.2335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,32768,0.2474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,131072,0.3225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,4,1,1,65536,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,2,0.2185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,4,0.2149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,8,0.2167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,16,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,32,0.2124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,64,0.2154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,128,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,256,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,512,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,1024,0.2129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,2048,0.2179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,4096,0.3067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,8192,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,16384,0.2451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,32768,0.2563
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,65536,0.2861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,8,1,1,131072,0.3429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,2,0.2232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,4,0.2259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,8,0.2257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,16,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,32,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,128,0.2278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,64,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,256,0.2247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,512,0.2235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,1024,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,2048,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,4096,0.3270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,8192,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,16384,0.2567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,65536,0.3115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,32768,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,2,0.2393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,4,0.2397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,16,1,1,131072,0.3756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,8,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,16,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,32,0.2411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,64,0.2391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,128,0.2413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,256,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,512,0.2392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,1024,0.2362
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,4096,0.3389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,2048,0.2423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,8192,0.2573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,16384,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,32768,0.2983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,65536,0.3449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,32,1,1,131072,0.4355
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,2,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,4,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,8,0.2661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,16,0.2705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,32,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,64,0.2726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,128,0.2711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,256,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,512,0.2696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,1024,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,2048,0.2686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,4096,0.3734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,8192,0.2931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,16384,0.3183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,32768,0.3496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,65536,0.4190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,64,1,1,131072,0.5455
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,2,0.3205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,4,0.3213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,8,0.3184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,16,0.3224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,32,0.3234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,64,0.3223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,128,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,256,0.3213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,512,0.3211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,1024,0.3246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,2048,0.3232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,4096,0.4326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,8192,0.3588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,16384,0.3911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,32768,0.4432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,65536,0.5541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,2,0.4499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,128,1,1,131072,0.7659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,4,0.4475
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,8,0.4503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,16,0.4501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,32,0.4515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,64,0.4551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,128,0.4555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,256,0.4541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,512,0.4499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,1024,0.4541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,2048,0.4658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,4096,0.6489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,8192,0.5147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,16384,0.5740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,32768,0.6712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,65536,0.8631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,2,0.6977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,4,0.7005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,8,0.7000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,256,1,1,131072,1.2934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,16,0.6988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,32,0.7034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,64,0.7086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,128,0.7124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,256,0.7163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,512,0.7161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,2048,0.7391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,1024,0.7229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,4096,1.0805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,8192,0.8291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,16384,0.9555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,32768,1.1505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,512,1,1,65536,1.5323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,2,1.1726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,4,1.1737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,8,1.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,16,1.1733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,32,1.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,64,1.1788
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,128,1.1836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,256,1.1857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,512,1.1927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,2048,1.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,1024,1.2082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,16384,1.6546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,4096,1.8263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,8192,1.4048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,2,0.1758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,64,1024,1,1,32768,2.0369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,4,0.1733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,8,0.1727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,16,0.1712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,64,0.1698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,32,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,128,0.1713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,256,0.1752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,512,0.1692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,1024,0.1736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,2048,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,4096,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,8192,0.1802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,32768,0.2151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,65536,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,16384,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1,1,1,131072,0.2774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,2,0.1868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,4,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,8,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,16,0.1883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,32,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,64,0.1863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,128,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,256,0.1848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,512,0.1883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,1024,0.1897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,2048,0.1878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,4096,0.2867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,8192,0.1933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,16384,0.2126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,65536,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,131072,0.2939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,2,1,1,32768,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,2,0.1963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,4,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,8,0.1964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,16,0.1945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,32,0.1912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,128,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,64,0.1944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,256,0.1947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,512,0.1972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,1024,0.1953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,2048,0.1967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,4096,0.2906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,8192,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,16384,0.2218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,32768,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,65536,0.2594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,4,1,1,131072,0.3129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,2,0.2003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,4,0.2026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,8,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,16,0.2001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,32,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,64,0.2014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,128,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,256,0.2076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,512,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,1024,0.2026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,2048,0.2073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,4096,0.2918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,8192,0.2168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,16384,0.2266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,32768,0.2423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,65536,0.2772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,8,1,1,131072,0.3316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,2,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,4,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,8,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,16,0.2145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,32,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,64,0.2117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,128,0.2143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,256,0.2125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,512,0.2127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,1024,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,2048,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,4096,0.3143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,8192,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,32768,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,16384,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,65536,0.2979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,16,1,1,131072,0.3666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,2,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,4,0.2210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,8,0.2273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,16,0.2250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,32,0.2267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,64,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,128,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,256,0.2259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,512,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,1024,0.2276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,2048,0.2256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,4096,0.3274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,8192,0.2442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,16384,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,32768,0.2823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,65536,0.3323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,32,1,1,131072,0.4236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,2,0.2530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,4,0.2503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,8,0.2518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,16,0.2526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,32,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,64,0.2541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,128,0.2530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,256,0.2530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,512,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,1024,0.2534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,2048,0.2578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,4096,0.3586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,8192,0.2748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,16384,0.2995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,32768,0.3318
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,65536,0.4015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,64,1,1,131072,0.5290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,2,0.2964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,4,0.2998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,8,0.2986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,16,0.2989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,32,0.2976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,64,0.3006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,128,0.2999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,512,0.3031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,256,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,1024,0.3045
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,2048,0.3055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,4096,0.4091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,8192,0.3319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,16384,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,32768,0.4209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,65536,0.5281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,2,0.4148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,4,0.4163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,8,0.4162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,128,1,1,131072,0.7532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,16,0.4145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,32,0.4154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,64,0.4172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,128,0.4154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,256,0.4179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,512,0.4203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,1024,0.4254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,2048,0.4311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,4096,0.6178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,16384,0.5376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,8192,0.4768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,32768,0.6321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,65536,0.8319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,2,0.6292
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,4,0.6284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,256,1,1,131072,1.2574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,8,0.6297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,16,0.6299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,32,0.6304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,64,0.6354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,128,0.6385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,256,0.6391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,512,0.6401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,1024,0.6498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,2048,0.6637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,4096,1.0072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,8192,0.7515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,16384,0.8800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,2,1.0387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,32768,1.0727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,4,1.0401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,512,1,1,65536,1.4637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,8,1.0427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,16,1.0427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,32,1.0470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,64,1.0550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,128,1.0540
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,256,1.0569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,512,1.0685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,2048,1.1098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,1024,1.0761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,4096,1.6957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,8192,1.2747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,16384,1.5178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,2,0.1716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,4,0.1694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,8,0.1659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,32,1024,1,1,32768,1.8923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,32,0.1675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,128,0.1711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,64,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,16,0.1762
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,256,0.1653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,512,0.1691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,1024,0.1618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,2048,0.1696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,4096,0.2635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,8192,0.1774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,16384,0.1927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,32768,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,65536,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1,1,1,131072,0.2766
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,2,0.1860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,4,0.1777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,16,0.1809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,8,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,32,0.1828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,64,0.1774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,128,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,256,0.1834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,512,0.1859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,1024,0.1761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,2048,0.1830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,8192,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,4096,0.2757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,16384,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,32768,0.2219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,65536,0.2500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,2,1,1,131072,0.2946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,2,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,4,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,16,0.1841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,8,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,64,0.1911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,32,0.1890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,128,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,256,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,512,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,1024,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,2048,0.1864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,4096,0.2835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,8192,0.2033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,16384,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,32768,0.2306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,65536,0.2560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,4,1,1,131072,0.2976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,2,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,4,0.1928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,8,0.1948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,32,0.2003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,64,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,16,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,128,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,256,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,512,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,1024,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,2048,0.1980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,4096,0.2980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,8192,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,16384,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,32768,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,65536,0.2695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,8,1,1,131072,0.3225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,2,0.2051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,4,0.1993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,8,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,16,0.2029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,32,0.2048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,64,0.2023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,128,0.2047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,256,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,512,0.2076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,1024,0.2055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,2048,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,4096,0.3048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,8192,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,16384,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,32768,0.2512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,65536,0.2931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,2,0.2117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,16,1,1,131072,0.3589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,4,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,8,0.2144
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,16,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,32,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,64,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,128,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,256,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,1024,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,512,0.2172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,2048,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,4096,0.3162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,8192,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,16384,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,32768,0.2765
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,65536,0.3218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,32,1,1,131072,0.4124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,2,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,4,0.2422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,8,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,32,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,64,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,16,0.2391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,128,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,512,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,1024,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,256,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,2048,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,4096,0.3485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,8192,0.2668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,16384,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,32768,0.3196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,65536,0.3913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,64,1,1,131072,0.5215
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,2,0.2817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,4,0.2820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,8,0.2804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,16,0.2818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,32,0.2805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,64,0.2779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,128,0.2831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,256,0.2820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,512,0.2835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,1024,0.2861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,2048,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,4096,0.3928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,8192,0.3162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,16384,0.3492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,32768,0.4042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,65536,0.5120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,4,0.3881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,2,0.3874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,128,1,1,131072,0.7259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,8,0.3890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,16,0.3861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,32,0.3893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,64,0.3857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,128,0.3894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,256,0.3907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,512,0.3918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,1024,0.3937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,2048,0.4047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,4096,0.5856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,8192,0.4489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,16384,0.5058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,32768,0.6097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,65536,0.7991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,2,0.5753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,256,1,1,131072,1.2244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,4,0.5765
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,8,0.5756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,16,0.5782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,32,0.5757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,64,0.5797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,128,0.5838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,256,0.5825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,512,0.5852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,2048,0.6080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,4096,0.9539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,1024,0.5947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,8192,0.6934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,16384,0.8231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,32768,1.0156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,512,1,1,65536,1.4033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,4,0.9229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,8,0.9219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,16,0.9250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,2,0.9240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,32,0.9269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,64,0.9336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,128,0.9329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,256,0.9349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,512,0.9449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,1024,0.9576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,16384,1.4014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,2048,0.9880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,4096,1.5803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,8192,1.1570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,2,0.1565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,8,0.1568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,16,1024,1,1,32768,1.7833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,4,0.1560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,16,0.1615
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,32,0.1578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,64,0.1651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,128,0.1635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,256,0.1570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,1024,0.1660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,512,0.1528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,4096,0.2544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,2048,0.1608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,8192,0.1696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,16384,0.1865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,32768,0.1965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,65536,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1,1,1,131072,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,2,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,4,0.1645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,8,0.1723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,16,0.1706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,32,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,64,0.1697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,128,0.1702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,256,0.1753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,512,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,1024,0.1648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,4096,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,2048,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,8192,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,16384,0.1967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,32768,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,65536,0.2368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,2,1,1,131072,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,2,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,4,0.1803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,8,0.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,16,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,32,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,128,0.1784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,64,0.1822
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,256,0.1764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,512,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,1024,0.1758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,2048,0.1828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,4096,0.2745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,8192,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,16384,0.2065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,32768,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,65536,0.2443
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,4,1,1,131072,0.2988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,2,0.1829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,4,0.1864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,8,0.1882
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,16,0.1880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,32,0.1851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,64,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,128,0.1856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,256,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,512,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,1024,0.1909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,2048,0.1906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,4096,0.2886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,8192,0.2032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,16384,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,32768,0.2361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,65536,0.2573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,8,1,1,131072,0.3145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,2,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,4,0.1991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,8,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,16,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,64,0.2013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,32,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,128,0.1939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,256,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,512,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,1024,0.2031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,2048,0.2063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,4096,0.2950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,8192,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,16384,0.2331
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,32768,0.2436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,65536,0.2833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,16,1,1,131072,0.3572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,2,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,4,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,32,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,16,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,8,0.2082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,64,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,128,0.2069
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,256,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,512,0.2143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,1024,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,2048,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,8192,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,16384,0.2463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,4096,0.3140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,32768,0.2696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,65536,0.3208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,32,1,1,131072,0.4142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,2,0.2347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,4,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,8,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,16,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,32,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,128,0.2359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,64,0.2317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,256,0.2375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,512,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,1024,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,2048,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,4096,0.3391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,8192,0.2594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,32768,0.3148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,16384,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,65536,0.3811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,64,1,1,131072,0.5159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,2,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,4,0.2720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,8,0.2728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,16,0.2742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,64,0.2742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,128,0.2719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,32,0.2723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,256,0.2749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,512,0.2720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,1024,0.2774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,2048,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,4096,0.3868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,8192,0.3112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,16384,0.3423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,32768,0.3952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,65536,0.5051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,128,1,1,131072,0.7167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,2,0.3743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,4,0.3749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,8,0.3755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,16,0.3739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,64,0.3731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,32,0.3754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,128,0.3758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,256,0.3748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,512,0.3790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,1024,0.3829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,2048,0.3897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,4096,0.5738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,8192,0.4335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,16384,0.4981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,32768,0.5927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,65536,0.7916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,256,1,1,131072,1.2169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,2,0.5513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,4,0.5496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,8,0.5517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,16,0.5510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,32,0.5526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,64,0.5531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,128,0.5578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,256,0.5572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,512,0.5590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,1024,0.5666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,2048,0.5846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,4096,0.9290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,16384,0.7947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,8192,0.6685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,32768,0.9915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,512,1,1,65536,1.3772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,2,0.8667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,4,0.8650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,8,0.8686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,16,0.8691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,64,0.8746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,32,0.8723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,128,0.8757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,256,0.8779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,512,0.8832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,2048,0.9268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,1024,0.8969
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,8192,1.0922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,16384,1.3412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,4096,1.5223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,2,0.1579
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,4,0.1486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,8,1024,1,1,32768,1.7211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,8,0.1582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,16,0.1483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,32,0.1524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,128,0.1549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,256,0.1588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,64,0.1586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,512,0.1466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,1024,0.1547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,2048,0.1592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,4096,0.2443
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,8192,0.1718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,16384,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,65536,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,32768,0.1849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,2,0.1671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1,1,1,131072,0.2575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,4,0.1712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,8,0.1675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,16,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,32,0.1627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,64,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,128,0.1596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,512,0.1672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,256,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,1024,0.1673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,2048,0.1628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,4096,0.2572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,16384,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,8192,0.1708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,32768,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,65536,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,2,1,1,131072,0.2788
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,2,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,4,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,8,0.1737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,32,0.1755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,16,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,64,0.1725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,128,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,256,0.1733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,512,0.1769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,1024,0.1752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,2048,0.1768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,4096,0.2694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,8192,0.1836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,16384,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,32768,0.2205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,65536,0.2481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,4,1,1,131072,0.2976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,2,0.1796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,4,0.1857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,8,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,16,0.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,32,0.1785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,128,0.1820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,64,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,256,0.1800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,1024,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,512,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,2048,0.1869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,4096,0.2752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,8192,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,16384,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,32768,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,65536,0.2624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,8,1,1,131072,0.3212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,2,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,4,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,8,0.1881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,16,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,32,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,64,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,128,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,256,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,512,0.1940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,2048,0.1948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,1024,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,4096,0.2898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,8192,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,16384,0.2329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,32768,0.2466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,65536,0.2910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,16,1,1,131072,0.3528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,2,0.1992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,8,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,4,0.2052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,32,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,16,0.2067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,64,0.2083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,128,0.2064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,256,0.2088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,512,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,1024,0.2069
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,2048,0.2126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,4096,0.3135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,8192,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,16384,0.2481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,32768,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,65536,0.3219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,32,1,1,131072,0.4041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,2,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,8,0.2316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,4,0.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,16,0.2313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,32,0.2320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,64,0.2313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,128,0.2306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,256,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,512,0.2311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,1024,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,2048,0.2360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,4096,0.3367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,8192,0.2572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,16384,0.2812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,32768,0.3108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,65536,0.3850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,64,1,1,131072,0.5152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,2,0.2634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,4,0.2672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,8,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,16,0.2690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,32,0.2676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,128,0.2681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,256,0.2704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,64,0.2626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,512,0.2690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,2048,0.2779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,1024,0.2726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,4096,0.3850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,8192,0.3070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,16384,0.3388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,32768,0.3922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,65536,0.4982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,128,1,1,131072,0.7228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,2,0.3678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,4,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,8,0.3683
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,16,0.3677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,32,0.3679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,64,0.3683
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,128,0.3673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,256,0.3681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,512,0.3698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,2048,0.3842
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,1024,0.3756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,4096,0.5664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,8192,0.4306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,16384,0.4889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,32768,0.5856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,65536,0.7819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,2,0.5377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,4,0.5360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,8,0.5359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,256,1,1,131072,1.2103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,16,0.5371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,32,0.5382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,64,0.5377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,128,0.5437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,256,0.5445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,512,0.5482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,1024,0.5527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,2048,0.5681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,4096,0.9126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,8192,0.6541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,16384,0.7856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,32768,0.9757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,512,1,1,65536,1.3637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,2,0.8393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,4,0.8367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,16,0.8409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,32,0.8430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,8,0.8396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,64,0.8450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,128,0.8457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,256,0.8498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,512,0.8592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,2048,0.8993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,1024,0.8707
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,4096,1.4972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,16384,1.3168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,8192,1.0734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,2,0.1489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,4,0.1467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,4,1024,1,1,32768,1.6934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,8,0.1485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,16,0.1442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,32,0.1555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,128,0.1446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,64,0.1429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,256,0.1487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,512,0.1486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,1024,0.1489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,2048,0.1463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,4096,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,8192,0.1646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,32768,0.1823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,16384,0.1693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,65536,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1,1,1,131072,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,2,0.1719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,4,0.1628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,8,0.1608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,16,0.1627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,32,0.1642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,64,0.1549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,128,0.1554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,256,0.1609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,512,0.1647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,1024,0.1630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,2048,0.1624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,8192,0.1788
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,16384,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,4096,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,32768,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,65536,0.2258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,2,1,1,131072,0.2777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,2,0.1748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,4,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,8,0.1724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,16,0.1648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,32,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,64,0.1641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,128,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,256,0.1737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,512,0.1685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,1024,0.1699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,2048,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,4096,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,8192,0.1842
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,16384,0.1951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,32768,0.2148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,65536,0.2491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,4,1,1,131072,0.2956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,2,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,4,0.1801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,8,0.1768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,16,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,32,0.1742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,64,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,128,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,256,0.1802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,2048,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,4096,0.2732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,512,0.1787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,8192,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,32768,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,1024,0.1787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,16384,0.2110
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,65536,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,8,1,1,131072,0.3101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,2,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,4,0.1931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,8,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,16,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,32,0.1898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,64,0.1858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,128,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,256,0.1945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,512,0.1909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,1024,0.1906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,2048,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,8192,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,4096,0.2964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,16384,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,32768,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,65536,0.2864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,16,1,1,131072,0.3479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,2,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,4,0.2050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,8,0.1984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,16,0.2001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,32,0.2031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,64,0.2026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,128,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,256,0.2033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,512,0.2009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,2048,0.2065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,4096,0.3059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,8192,0.2257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,1024,0.2085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,16384,0.2434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,32768,0.2675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,65536,0.3167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,32,1,1,131072,0.4029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,2,0.2250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,4,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,8,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,16,0.2277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,64,0.2296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,32,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,128,0.2300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,256,0.2266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,512,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,1024,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,2048,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,8192,0.2544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,16384,0.2796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,4096,0.3350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,32768,0.3122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,65536,0.3813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,64,1,1,131072,0.5119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,4,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,2,0.2647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,8,0.2644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,16,0.2645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,32,0.2655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,128,0.2627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,64,0.2646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,256,0.2640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,512,0.2708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,1024,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,2048,0.2757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,4096,0.3784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,8192,0.2999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,16384,0.3360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,32768,0.3896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,65536,0.4935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,128,1,1,131072,0.7106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,2,0.3629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,4,0.3629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,8,0.3629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,16,0.3614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,32,0.3619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,64,0.3632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,128,0.3643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,256,0.3659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,512,0.3670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,2048,0.3806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,1024,0.3708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,4096,0.5642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,8192,0.4236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,16384,0.4879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,32768,0.5834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,65536,0.7831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,2,0.5293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,4,0.5288
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,16,0.5286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,32,0.5305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,8,0.5288
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,256,1,1,131072,1.2009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,64,0.5318
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,128,0.5374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,256,0.5372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,512,0.5412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,2048,0.5612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,1024,0.5465
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,4096,0.9042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,8192,0.6489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,16384,0.7786
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,32768,0.9701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,4,0.8256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,512,1,1,65536,1.3562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,8,0.8235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,2,0.8257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,32,0.8291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,16,0.8308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,64,0.8326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,128,0.8349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,256,0.8355
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,2048,0.8856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,512,0.8458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,1024,0.8602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,4096,1.4849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,8192,1.0545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,16384,1.3055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,2,1024,1,1,32768,1.6809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,2,0.1432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,4,0.1457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,8,0.1555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,16,0.1426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,32,0.1467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,64,0.1549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,256,0.1410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,128,0.1413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,512,0.1528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,1024,0.1546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,2048,0.1423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,4096,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,8192,0.1595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,16384,0.1718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,32768,0.1847
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,65536,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1,1,1,131072,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,2,0.1644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,4,0.1584
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,8,0.1608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,16,0.1627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,32,0.1602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,64,0.1552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,128,0.1544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,256,0.1582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,512,0.1588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,1024,0.1593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,2048,0.1595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,4096,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,8192,0.1665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,16384,0.1907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,32768,0.1907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,65536,0.2226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,2,1,1,131072,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,4,0.1692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,8,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,2,0.1627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,16,0.1590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,64,0.1610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,32,0.1651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,128,0.1654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,256,0.1683
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,512,0.1691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,1024,0.1680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,2048,0.1655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,4096,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,8192,0.1733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,16384,0.1933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,32768,0.2134
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,65536,0.2411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,4,1,1,131072,0.2894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,4,0.1725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,8,0.1705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,2,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,16,0.1721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,32,0.1782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,64,0.1735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,128,0.1766
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,256,0.1748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,512,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,1024,0.1753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,2048,0.1809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,4096,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,8192,0.1952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,16384,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,32768,0.2238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,65536,0.2573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,8,1,1,131072,0.3102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,2,0.1821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,4,0.1907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,8,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,16,0.1928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,32,0.1851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,64,0.1877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,128,0.1863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,256,0.1871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,512,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,1024,0.1839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,2048,0.1969
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,4096,0.2993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,8192,0.2076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,16384,0.2235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,32768,0.2391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,65536,0.2763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,16,1,1,131072,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,2,0.1949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,4,0.2048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,8,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,16,0.1971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,32,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,64,0.1964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,128,0.1958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,256,0.1921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,512,0.2033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,1024,0.2066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,2048,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,4096,0.3051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,8192,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,16384,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,32768,0.2595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,65536,0.3103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,32,1,1,131072,0.4053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,2,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,8,0.2244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,16,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,4,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,32,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,64,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,128,0.2266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,256,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,512,0.2280
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,1024,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,2048,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,4096,0.3326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,8192,0.2495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,16384,0.2723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,32768,0.3108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,65536,0.3772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,64,1,1,131072,0.5060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,2,0.2608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,4,0.2611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,16,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,8,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,32,0.2563
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,64,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,128,0.2622
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,256,0.2632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,512,0.2677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,1024,0.2677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,2048,0.2725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,4096,0.3777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,8192,0.2966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,16384,0.3350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,32768,0.3874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,65536,0.4932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,128,1,1,131072,0.7156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,2,0.3588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,4,0.3587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,8,0.3589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,16,0.3574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,32,0.3601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,64,0.3606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,128,0.3583
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,256,0.3639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,512,0.3634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,1024,0.3668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,2048,0.3782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,4096,0.5661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,8192,0.4203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,32768,0.5784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,16384,0.4842
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,65536,0.7723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,2,0.5240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,4,0.5232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,256,1,1,131072,1.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,8,0.5234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,16,0.5251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,32,0.5223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,128,0.5317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,64,0.5268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,256,0.5305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,2048,0.5570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,512,0.5343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,1024,0.5415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,4096,0.8976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,8192,0.6407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,16384,0.7732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,32768,0.9670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,2,0.8158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,4,0.8178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,16,0.8220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,512,1,1,65536,1.3474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,32,0.8218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,8,0.8175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,64,0.8256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,128,0.8279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,256,0.8276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,2048,0.8819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,512,0.8367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,1024,0.8552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,4096,1.4730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,8192,1.0451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,16384,1.2954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,float16,1,1024,1,1,32768,1.6853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,2,0.2547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,4,0.2503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,8,0.2555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,32,0.2495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,16,0.2711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,128,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,64,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,512,0.2564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,256,0.2589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,1024,0.2517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,2048,0.2608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,4096,0.2650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,8192,0.2834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,32768,0.3258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,16384,0.2939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,65536,0.3785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1,1,1,131072,0.4939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,2,0.2599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,4,0.2606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,8,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,16,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,32,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,64,0.2643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,128,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,256,0.2657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,512,0.2600
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,2048,0.2574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,4096,0.2857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,16384,0.3013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,8192,0.2927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,1024,0.2668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,32768,0.3242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,65536,0.3933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,2,1,1,131072,0.4923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,2,0.2627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,4,0.2703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,16,0.2711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,32,0.2705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,64,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,8,0.2705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,128,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,256,0.2675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,512,0.2622
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,1024,0.2720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,2048,0.2739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,4096,0.4034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,8192,0.2964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,16384,0.3074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,32768,0.3350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,65536,0.3922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,4,1,1,131072,0.5002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,2,0.2758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,4,0.2768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,8,0.2756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,16,0.2742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,32,0.2740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,128,0.2706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,64,0.2771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,256,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,512,0.2755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,1024,0.2741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,2048,0.2778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,4096,0.4087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,8192,0.3055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,16384,0.3135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,32768,0.3473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,65536,0.4021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,8,1,1,131072,0.5340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,2,0.2866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,4,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,8,0.2837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,16,0.2860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,64,0.2808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,32,0.2817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,128,0.2865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,256,0.2870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,512,0.2851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,2048,0.2943
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,4096,0.4263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,1024,0.2852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,8192,0.3125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,16384,0.3243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,32768,0.3624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,65536,0.4347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,16,1,1,131072,0.5691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,2,0.2997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,4,0.3001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,8,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,16,0.2933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,32,0.2957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,64,0.2983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,128,0.2968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,256,0.2977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,1024,0.3054
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,512,0.3019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,2048,0.3064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,4096,0.4358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,8192,0.3305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,16384,0.3522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,32768,0.3871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,65536,0.4714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,32,1,1,131072,0.6272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,2,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,4,0.3207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,8,0.3214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,16,0.3229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,32,0.3232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,64,0.3238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,128,0.3233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,256,0.3329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,512,0.3282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,1024,0.3289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,2048,0.3301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,4096,0.4678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,8192,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,16384,0.3893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,32768,0.4356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,65536,0.5418
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,64,1,1,131072,0.7365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,2,0.4039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,4,0.4021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,8,0.4087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,16,0.4062
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,32,0.4046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,64,0.4037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,128,0.4061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,512,0.4080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,256,0.4199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,1024,0.4127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,2048,0.4257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,4096,0.5733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,8192,0.4722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,16384,0.5020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,32768,0.5701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,2,0.5598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,65536,0.7141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,4,0.5602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,128,1,1,131072,0.9945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,8,0.5621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,16,0.5614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,64,0.5673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,32,0.5635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,128,0.5647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,256,0.5756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,512,0.5871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,1024,0.5997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,2048,0.6255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,4096,0.8785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,16384,0.7438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,8192,0.6858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,32768,0.8551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,65536,1.0950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,2,0.8434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,4,0.8453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,256,1,1,131072,1.6493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,8,0.8461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,16,0.8806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,32,0.8470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,64,0.8462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,128,0.8529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,256,0.8636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,512,0.8803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,2048,0.9515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,1024,0.9003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,4096,1.3380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,16384,1.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,8192,1.0646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,32768,1.4226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,512,1,1,65536,1.8881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,2,1.4580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,4,1.4563
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,8,1.5260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,16,1.4623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,32,1.4630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,64,1.4711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,128,1.4799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,256,1.4984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,2048,1.6714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,512,1.5225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,4096,2.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,1024,1.5684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,32768,2.5684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,8192,1.8855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,128,1024,1,1,16384,2.1169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,2,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,4,0.2411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,8,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,16,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,32,0.2402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,64,0.2287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,128,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,256,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,512,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,1024,0.2435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,2048,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,4096,0.3704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,8192,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,16384,0.2677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,32768,0.3094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,65536,0.3564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1,1,1,131072,0.4699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,2,0.2459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,4,0.2477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,8,0.2458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,16,0.2480
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,32,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,64,0.2470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,128,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,512,0.2491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,256,0.2449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,1024,0.2462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,2048,0.2486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,4096,0.2628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,8192,0.2748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,16384,0.2817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,32768,0.3111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,65536,0.3636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,2,1,1,131072,0.4766
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,2,0.2535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,4,0.2505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,8,0.2461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,16,0.2544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,32,0.2517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,64,0.2534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,128,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,256,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,512,0.2494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,1024,0.2526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,2048,0.2483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,4096,0.2721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,8192,0.2807
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,16384,0.2894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,32768,0.3112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,65536,0.3793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,4,1,1,131072,0.4866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,4,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,2,0.2618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,8,0.2599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,16,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,32,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,64,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,128,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,256,0.2581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,512,0.2592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,1024,0.2602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,2048,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,4096,0.3983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,8192,0.2903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,16384,0.2972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,32768,0.3276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,65536,0.3936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,8,1,1,131072,0.5184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,4,0.2649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,2,0.2676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,8,0.2675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,16,0.2626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,32,0.2641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,64,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,128,0.2708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,256,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,512,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,1024,0.2687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,2048,0.2774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,4096,0.4012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,8192,0.2998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,16384,0.3139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,32768,0.3379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,65536,0.4140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,16,1,1,131072,0.5514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,2,0.2761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,4,0.2750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,8,0.2758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,16,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,32,0.2756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,64,0.2714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,128,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,256,0.2728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,512,0.2756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,1024,0.2858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,2048,0.2895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,4096,0.4151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,16384,0.3243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,8192,0.3141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,32768,0.3624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,65536,0.4474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,32,1,1,131072,0.6051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,4,0.2898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,8,0.2949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,16,0.2928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,2,0.2961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,32,0.2947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,64,0.2928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,256,0.2941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,128,0.2934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,512,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,2048,0.3129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,4096,0.4439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,1024,0.2986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,8192,0.3411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,16384,0.3596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,32768,0.4095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,65536,0.5139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,64,1,1,131072,0.7025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,2,0.3591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,4,0.3590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,8,0.3550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,16,0.3572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,32,0.3571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,64,0.3545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,128,0.3563
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,256,0.3569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,512,0.3665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,1024,0.3721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,2048,0.3789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,4096,0.5225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,8192,0.4233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,16384,0.4554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,32768,0.5233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,65536,0.6647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,128,1,1,131072,0.9507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,2,0.4802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,4,0.4791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,8,0.4813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,16,0.4794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,32,0.4801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,64,0.5119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,128,0.4813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,256,0.4898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,512,0.4966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,1024,0.5074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,2048,0.5350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,4096,0.7887
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,8192,0.5950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,16384,0.6496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,32768,0.7734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,65536,1.0041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,2,0.6999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,256,1,1,131072,1.5684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,4,0.7046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,8,0.7013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,16,0.7809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,32,0.7098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,64,0.7153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,128,0.7209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,256,0.7297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,512,0.7444
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,1024,0.7663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,2048,0.8148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,4096,1.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,16384,1.0495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,8192,0.9306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,32768,1.2802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,2,1.1963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,512,1,1,65536,1.7453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,4,1.1949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,16,1.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,32,1.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,64,1.2131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,8,1.1967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,128,1.2167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,256,1.2371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,512,1.2612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,1024,1.3035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,2048,1.4084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,4096,1.9484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,2,0.2130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,8192,1.6267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,16384,1.8654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,4,0.2014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,8,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,64,1024,1,1,32768,2.3135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,16,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,32,0.2151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,64,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,128,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,256,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,512,0.2168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,1024,0.2173
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,2048,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,4096,0.3549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,8192,0.2482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,16384,0.2515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,32768,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,65536,0.3486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,2,0.2348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1,1,1,131072,0.4469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,4,0.2274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,8,0.2212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,16,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,32,0.2341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,64,0.2256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,128,0.2233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,256,0.2275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,512,0.2249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,1024,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,2048,0.2244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,4096,0.2520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,8192,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,16384,0.2560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,32768,0.3007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,65536,0.3605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,2,1,1,131072,0.4609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,2,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,4,0.2296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,8,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,16,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,32,0.2358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,64,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,128,0.2314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,256,0.2295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,512,0.2236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,4096,0.3679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,1024,0.2376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,8192,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,16384,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,2048,0.2372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,32768,0.3053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,65536,0.3646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,4,1,1,131072,0.4896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,2,0.2362
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,4,0.2437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,8,0.2342
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,16,0.2379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,32,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,64,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,128,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,256,0.2460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,512,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,1024,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,2048,0.2524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,4096,0.3830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,8192,0.2752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,16384,0.2928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,32768,0.3122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,65536,0.3892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,8,1,1,131072,0.5057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,2,0.2389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,4,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,8,0.2459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,16,0.2441
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,64,0.2422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,32,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,128,0.2472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,256,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,512,0.2442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,1024,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,8192,0.2899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,4096,0.3929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,16384,0.2968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,32768,0.3344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,2048,0.2656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,65536,0.4036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,16,1,1,131072,0.5341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,2,0.2521
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,4,0.2501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,8,0.2555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,16,0.2567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,32,0.2535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,64,0.2538
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,128,0.2484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,256,0.2555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,512,0.2642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,1024,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,2048,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,4096,0.4041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,8192,0.3027
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,16384,0.3117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,32768,0.3483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,65536,0.4365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,32,1,1,131072,0.5962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,2,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,4,0.2781
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,8,0.2708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,16,0.2714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,32,0.2730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,64,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,128,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,256,0.2781
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,512,0.2818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,1024,0.2830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,2048,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,4096,0.4239
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,8192,0.3215
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,16384,0.3416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,32768,0.3894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,65536,0.4972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,64,1,1,131072,0.6915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,2,0.3314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,4,0.3233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,8,0.3260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,16,0.3279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,32,0.3224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,64,0.3268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,128,0.3299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,256,0.3348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,512,0.3395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,1024,0.3358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,2048,0.3527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,4096,0.4913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,8192,0.3925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,16384,0.4270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,32768,0.4923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,65536,0.6394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,128,1,1,131072,0.9183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,2,0.4241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,4,0.4234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,16,0.4245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,32,0.4233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,64,0.4251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,128,0.4290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,8,0.4288
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,256,0.4315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,512,0.4372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,1024,0.4515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,2048,0.4771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,4096,0.7324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,16384,0.5962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,8192,0.5400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,32768,0.7152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,65536,0.9474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,2,0.5952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,4,0.5944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,8,0.5961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,256,1,1,131072,1.5109
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,16,0.5932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,32,0.5961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,64,0.5995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,128,0.6378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,256,0.6191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,2048,0.7027
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,4096,1.0848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,512,0.6246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,8192,0.8138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,16384,0.9372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,1024,0.6483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,32768,1.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,512,1,1,65536,1.6363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,2,0.9749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,8,0.9806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,4,0.9773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,16,0.9893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,32,0.9965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,64,0.9991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,128,1.0029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,256,1.0256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,512,1.0467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,2048,1.1927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,1024,1.0873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,4096,1.7428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,8192,1.4088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,2,0.2087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,16384,1.6504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,32,1024,1,1,32768,2.0935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,4,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,8,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,16,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,32,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,64,0.2113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,128,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,256,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,512,0.2023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,1024,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,2048,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,4096,0.2225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,8192,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,16384,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,32768,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,65536,0.3290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1,1,1,131072,0.4336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,2,0.2133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,4,0.2063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,8,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,16,0.2135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,32,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,64,0.2150
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,128,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,512,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,1024,0.2074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,256,0.2044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,2048,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,4096,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,8192,0.2257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,16384,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,32768,0.2768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,65536,0.3385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,2,1,1,131072,0.4536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,2,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,4,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,8,0.2144
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,16,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,32,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,64,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,128,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,256,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,512,0.2135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,2048,0.2146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,1024,0.2113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,4096,0.3515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,8192,0.2433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,16384,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,32768,0.2899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,65536,0.3594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,4,1,1,131072,0.4746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,2,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,4,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,8,0.2266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,16,0.2265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,32,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,64,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,128,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,256,0.2257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,512,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,1024,0.2193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,2048,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,4096,0.3620
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,16384,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,8192,0.2626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,32768,0.3148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,65536,0.3762
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,8,1,1,131072,0.5009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,2,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,4,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,8,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,16,0.2314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,32,0.2328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,64,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,128,0.2274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,256,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,512,0.2299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,1024,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,2048,0.2485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,4096,0.3818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,8192,0.2857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,16384,0.2970
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,32768,0.3227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,65536,0.3984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,16,1,1,131072,0.5288
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,2,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,4,0.2389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,8,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,16,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,32,0.2411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,64,0.2357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,128,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,256,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,512,0.2477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,1024,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,2048,0.2668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,4096,0.4008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,8192,0.2935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,16384,0.3063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,32768,0.3446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,65536,0.4340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,32,1,1,131072,0.5825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,4,0.2587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,8,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,2,0.2506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,32,0.2588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,16,0.2488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,64,0.2519
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,128,0.2580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,256,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,512,0.2707
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,1024,0.2720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,2048,0.2805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,4096,0.4147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,8192,0.3138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,16384,0.3328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,32768,0.3819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,65536,0.4829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,2,0.3066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,64,1,1,131072,0.6787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,4,0.3111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,8,0.3106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,16,0.3115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,32,0.3039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,64,0.3186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,128,0.3078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,256,0.3124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,512,0.3201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,1024,0.3279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,2048,0.3397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,4096,0.4790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,8192,0.3799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,16384,0.4099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,32768,0.4746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,65536,0.6189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,128,1,1,131072,0.9000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,2,0.3990
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,4,0.4095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,8,0.3983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,16,0.3989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,32,0.3977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,64,0.3977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,128,0.3991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,256,0.4541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,512,0.4159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,1024,0.4257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,2048,0.4507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,8192,0.5114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,4096,0.7049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,16384,0.5664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,32768,0.6857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,65536,0.9249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,2,0.5751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,256,1,1,131072,1.4826
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,4,0.5463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,8,0.5448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,16,0.5445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,32,0.5438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,64,0.5470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,128,0.5743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,256,0.5616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,512,0.5752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,1024,0.5991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,2048,0.6468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,4096,1.0339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,8192,0.7598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,16384,0.8849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,32768,1.1172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,512,1,1,65536,1.5802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,4,0.8666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,2,0.8670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,16,0.8740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,32,0.8748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,8,0.9946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,64,0.8837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,128,0.8837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,256,0.9836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,512,0.9321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,2048,1.0822
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,1024,0.9724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,4096,1.6267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,8192,1.2885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,2,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,16384,1.5353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,16,1024,1,1,32768,1.9885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,4,0.2035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,8,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,16,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,32,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,64,0.2044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,128,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,256,0.2048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,512,0.1941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,1024,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,2048,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,8192,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,16384,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,32768,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,4096,0.3331
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,65536,0.3214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1,1,1,131072,0.4275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,2,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,4,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,16,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,32,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,8,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,64,0.2081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,128,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,256,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,512,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,1024,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,2048,0.2024
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,8192,0.2265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,16384,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,4096,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,32768,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,65536,0.3255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,2,1,1,131072,0.4477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,2,0.2151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,4,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,8,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,16,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,32,0.2094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,64,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,128,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,256,0.2081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,512,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,1024,0.2107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,2048,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,4096,0.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,8192,0.2361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,16384,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,65536,0.3426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,131072,0.4685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,4,1,1,32768,0.2818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,2,0.2165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,4,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,16,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,8,0.2208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,64,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,32,0.2166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,128,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,256,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,512,0.2184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,1024,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,2048,0.2205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,4096,0.3524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,8192,0.2463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,16384,0.2684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,32768,0.3041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,65536,0.3793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,8,1,1,131072,0.4923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,2,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,4,0.2232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,8,0.2295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,16,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,32,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,64,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,128,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,256,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,512,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,1024,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,2048,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,4096,0.3696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,8192,0.2763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,16384,0.2943
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,32768,0.3265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,65536,0.3932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,16,1,1,131072,0.5336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,2,0.2307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,4,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,8,0.2337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,16,0.2320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,64,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,32,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,128,0.2368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,256,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,512,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,1024,0.2515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,4096,0.3984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,8192,0.2906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,16384,0.3087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,32768,0.3441
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,2048,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,65536,0.4246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,32,1,1,131072,0.5781
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,2,0.2449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,4,0.2453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,8,0.2456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,16,0.2419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,32,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,64,0.2418
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,128,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,256,0.2552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,512,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,1024,0.2682
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,2048,0.2770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,4096,0.4096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,8192,0.3104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,32768,0.3749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,16384,0.3272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,65536,0.4771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,64,1,1,131072,0.6772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,2,0.3003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,4,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,8,0.2972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,16,0.2904
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,32,0.2895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,128,0.2962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,64,0.2913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,256,0.3076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,512,0.3118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,1024,0.3196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,2048,0.3301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,16384,0.3976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,4096,0.4653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,8192,0.3739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,32768,0.4694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,65536,0.6153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,2,0.3848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,4,0.3824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,128,1,1,131072,0.8949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,32,0.3841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,64,0.3862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,8,0.3850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,16,0.3834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,128,0.3894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,256,0.4067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,512,0.4031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,1024,0.4129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,2048,0.4373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,4096,0.6895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,16384,0.5542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,8192,0.4991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,32768,0.6712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,65536,0.9015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,4,0.5199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,8,0.5216
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,16,0.5207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,32,0.5230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,256,1,1,131072,1.4700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,64,0.5238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,2,0.5223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,128,0.5804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,256,0.5378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,2048,0.6264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,1024,0.5735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,4096,1.0066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,512,0.5512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,16384,0.8583
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,8192,0.7374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,32768,1.0932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,512,1,1,65536,1.5552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,2,0.8152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,4,0.8149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,16,0.8202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,32,0.8265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,8,0.8165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,64,0.8330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,128,0.9427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,256,0.8543
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,512,0.8804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,2048,1.0286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,4096,1.5736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,1024,0.9382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,8192,1.2403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,16384,1.4813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,8,1024,1,1,32768,1.9423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,2,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,4,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,32,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,16,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,64,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,8,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,128,0.1954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,256,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,512,0.1949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,1024,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,2048,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,4096,0.2106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,8192,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,16384,0.2265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,32768,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,65536,0.3136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1,1,1,131072,0.4197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,2,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,4,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,8,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,16,0.2001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,32,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,64,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,128,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,256,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,512,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,1024,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,2048,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,4096,0.2166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,8192,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,16384,0.2384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,32768,0.2662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,65536,0.3321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,2,1,1,131072,0.4370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,2,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,4,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,8,0.2022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,16,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,32,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,64,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,128,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,256,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,512,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,1024,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,2048,0.2042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,4096,0.3491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,8192,0.2331
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,16384,0.2476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,32768,0.2695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,65536,0.3523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,4,1,1,131072,0.4670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,4,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,2,0.2215
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,8,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,16,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,32,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,128,0.2197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,64,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,256,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,512,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,1024,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,2048,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,4096,0.3496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,8192,0.2542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,16384,0.2573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,65536,0.3770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,32768,0.2998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,8,1,1,131072,0.5041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,2,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,4,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,8,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,16,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,64,0.2252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,128,0.2234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,32,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,256,0.2291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,512,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,1024,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,2048,0.2415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,4096,0.3763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,8192,0.2701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,16384,0.2821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,32768,0.3218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,65536,0.4001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,16,1,1,131072,0.5340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,2,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,4,0.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,8,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,16,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,32,0.2319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,128,0.2358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,64,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,256,0.2328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,512,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,1024,0.2438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,2048,0.2647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,4096,0.3935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,8192,0.2852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,16384,0.3019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,32768,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,131072,0.5822
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,32,1,1,65536,0.4260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,2,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,4,0.2503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,8,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,16,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,64,0.2468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,128,0.2430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,256,0.2491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,32,0.2403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,512,0.2571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,1024,0.2699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,2048,0.2694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,4096,0.4076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,8192,0.3056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,16384,0.3280
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,32768,0.3735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,65536,0.4771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,2,0.3012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,64,1,1,131072,0.6713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,4,0.2923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,8,0.2964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,16,0.3009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,32,0.2956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,64,0.2890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,128,0.2923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,256,0.3026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,512,0.3159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,1024,0.3166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,2048,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,4096,0.4651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,8192,0.3696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,16384,0.4012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,32768,0.4667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,65536,0.6106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,128,1,1,131072,0.8939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,2,0.3727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,4,0.3771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,8,0.4069
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,16,0.3924
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,64,0.3775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,32,0.3789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,128,0.3797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,256,0.3993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,512,0.3915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,1024,0.4067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,2048,0.4315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,4096,0.6857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,16384,0.5479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,8192,0.4925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,32768,0.6670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,65536,0.8997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,2,0.5083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,4,0.5107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,256,1,1,131072,1.4627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,8,0.5468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,16,0.5084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,32,0.5103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,64,0.5150
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,128,0.5719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,256,0.5261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,512,0.5687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,1024,0.5602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,2048,0.6131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,16384,0.8437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,4096,0.9968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,8192,0.7229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,32768,1.0815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,2,0.7935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,512,1,1,65536,1.5464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,4,0.7942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,8,0.8644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,16,0.7971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,32,0.8041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,64,0.8113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,128,0.9201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,256,0.8523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,512,0.8528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,2048,1.0145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,1024,0.8986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,16384,1.4539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,4096,1.5120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,2,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,8192,1.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,4,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,4,1024,1,1,32768,1.9158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,8,0.1981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,16,0.1890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,32,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,128,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,64,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,256,0.1939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,512,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,1024,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,2048,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,4096,0.3180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,8192,0.2194
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,16384,0.2258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,32768,0.2510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,65536,0.3124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1,1,1,131072,0.4259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,2,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,8,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,4,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,16,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,64,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,256,0.2046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,32,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,512,0.1912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,128,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,1024,0.1979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,2048,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,4096,0.3349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,8192,0.2166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,16384,0.2348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,32768,0.2494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,131072,0.4351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,2,1,1,65536,0.3189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,4,0.2146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,2,0.2043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,8,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,16,0.2062
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,32,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,64,0.2103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,128,0.2124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,256,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,512,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,1024,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,2048,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,4096,0.3366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,8192,0.2225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,16384,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,32768,0.2729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,65536,0.3393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,4,1,1,131072,0.4690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,2,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,4,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,8,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,16,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,32,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,64,0.2234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,128,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,256,0.2186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,512,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,1024,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,2048,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,4096,0.3471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,16384,0.2628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,8192,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,32768,0.2951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,65536,0.3754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,8,1,1,131072,0.4997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,2,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,4,0.2185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,8,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,16,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,32,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,64,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,128,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,512,0.2274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,256,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,2048,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,4096,0.3612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,1024,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,8192,0.2659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,16384,0.2874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,32768,0.3202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,65536,0.3967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,16,1,1,131072,0.5201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,2,0.2337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,4,0.2328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,16,0.2280
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,8,0.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,32,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,64,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,128,0.2313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,512,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,256,0.2327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,1024,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,4096,0.3916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,2048,0.2597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,8192,0.2891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,16384,0.3018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,32768,0.3348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,65536,0.4244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,32,1,1,131072,0.5809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,2,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,4,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,8,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,16,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,32,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,64,0.2431
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,128,0.2411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,256,0.2480
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,512,0.2391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,1024,0.2668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,2048,0.2744
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,8192,0.3017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,4096,0.4081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,16384,0.3277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,32768,0.3731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,65536,0.4804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,64,1,1,131072,0.6663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,2,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,4,0.2943
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,8,0.2797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,16,0.2882
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,32,0.2920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,64,0.2942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,128,0.2823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,256,0.2932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,512,0.3064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,1024,0.3088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,4096,0.4658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,2048,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,8192,0.3663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,16384,0.3985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,32768,0.4606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,65536,0.6111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,128,1,1,131072,0.8906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,2,0.3971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,4,0.3742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,8,0.3744
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,16,0.3724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,32,0.3737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,64,0.3728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,256,0.3892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,128,0.3797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,512,0.3880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,2048,0.4267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,4096,0.6808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,1024,0.4117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,8192,0.4907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,16384,0.5430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,32768,0.6609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,65536,0.9009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,2,0.5038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,256,1,1,131072,1.4602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,4,0.5014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,8,0.5017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,16,0.5018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,32,0.5032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,64,0.5091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,256,0.5223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,128,0.5631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,512,0.5336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,1024,0.5528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,2048,0.6022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,4096,1.0955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,8192,0.7163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,16384,0.8387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,32768,1.0781
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,2,0.7816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,8,0.7815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,512,1,1,65536,1.5412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,4,0.7834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,16,0.7828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,64,0.7986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,32,0.7913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,128,0.9077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,256,0.8202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,512,0.8413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,2048,0.9890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,1024,0.8853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,4096,1.5466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,8192,1.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,16384,1.4456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,2,1024,1,1,32768,1.9001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,2,0.1933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,4,0.1932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,8,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,32,0.1965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,16,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,512,0.1924
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,64,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,128,0.1925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,2048,0.1863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,1024,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,256,0.1885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,4096,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,8192,0.2166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,16384,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,32768,0.2451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,65536,0.3113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1,1,1,131072,0.4083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,2,0.1969
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,8,0.2022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,4,0.2042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,16,0.1950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,32,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,64,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,128,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,256,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,512,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,1024,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,4096,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,2048,0.2041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,8192,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,16384,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,32768,0.2534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,65536,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,2,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,2,1,1,131072,0.4193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,4,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,8,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,16,0.2071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,32,0.2009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,64,0.2086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,128,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,256,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,512,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,1024,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,2048,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,4096,0.3347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,8192,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,16384,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,32768,0.2666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,65536,0.3265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,2,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,4,1,1,131072,0.4702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,4,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,8,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,16,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,32,0.2165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,64,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,128,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,256,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,512,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,1024,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,2048,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,8192,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,16384,0.2484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,4096,0.3449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,32768,0.2787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,65536,0.3644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,8,1,1,131072,0.4957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,2,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,8,0.2231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,16,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,4,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,32,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,64,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,128,0.2153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,256,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,512,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,1024,0.2247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,2048,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,4096,0.3614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,8192,0.2515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,16384,0.2773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,32768,0.3132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,65536,0.3920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,16,1,1,131072,0.5271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,4,0.2247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,8,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,16,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,32,0.2223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,64,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,2,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,128,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,256,0.2287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,512,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,1024,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,2048,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,4096,0.3819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,8192,0.2798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,16384,0.2979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,32768,0.3363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,65536,0.4231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,2,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,32,1,1,131072,0.5701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,4,0.2360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,8,0.2330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,16,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,32,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,64,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,128,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,512,0.2445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,256,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,1024,0.2649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,2048,0.2672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,4096,0.3996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,16384,0.3214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,8192,0.3046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,32768,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,65536,0.4691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,64,1,1,131072,0.6677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,2,0.2888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,4,0.2864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,8,0.2875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,16,0.2880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,32,0.2880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,64,0.2825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,128,0.2719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,512,0.3055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,256,0.2947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,1024,0.3104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,2048,0.3223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,4096,0.4562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,8192,0.3626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,16384,0.3925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,32768,0.4564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,4,0.3705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,131072,0.8875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,2,0.3684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,128,1,1,65536,0.6079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,8,0.3683
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,32,0.3708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,16,0.3710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,64,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,128,0.3772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,256,0.3823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,512,0.3869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,2048,0.4205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,1024,0.4083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,4096,0.6765
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,8192,0.4861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,16384,0.5386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,32768,0.6604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,65536,0.8976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,256,1,1,131072,1.4531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,2,0.4971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,4,0.4977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,8,0.4987
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,16,0.4992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,32,0.4950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,64,0.5047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,128,0.5089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,256,0.5151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,2048,0.6002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,512,0.5282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,4096,1.0940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,1024,0.5637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,8192,0.7109
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,16384,0.8323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,32768,1.0722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,512,1,1,65536,1.5338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,2,0.7722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,4,0.7732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,8,0.7750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,16,0.7734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,64,0.7918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,32,0.7900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,128,0.7968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,256,0.8112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,2048,0.9809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,512,0.8332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,4096,1.5339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,8192,1.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,1024,0.8780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,16384,1.4450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,2,0.2545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,4,0.2330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,8,0.2382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,fp8_block,1,1024,1,1,32768,1.8870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,16,0.2317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,32,0.2455
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,64,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,256,0.2371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,128,0.2381
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,512,0.2391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,1024,0.2336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,2048,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,4096,0.2555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,8192,0.2677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,16384,0.2699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,32768,0.2948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,65536,0.3631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1,1,1,131072,0.4715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,4,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,8,0.2649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,2,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,16,0.2512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,32,0.2485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,64,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,128,0.2483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,256,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,512,0.2512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,4096,0.2633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,2048,0.2403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,1024,0.2379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,8192,0.2714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,16384,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,32768,0.3065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,65536,0.3722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,2,1,1,131072,0.4814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,2,0.2521
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,4,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,8,0.2538
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,16,0.2589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,32,0.2573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,64,0.2587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,128,0.2637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,256,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,512,0.2534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,1024,0.2599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,2048,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,4096,0.3881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,8192,0.2828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,16384,0.2945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,32768,0.3225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,65536,0.3848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,4,1,1,131072,0.4893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,2,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,4,0.2643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,8,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,16,0.2688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,32,0.2705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,64,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,128,0.2703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,256,0.2618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,512,0.2677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,1024,0.2641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,2048,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,4096,0.3998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,8192,0.2964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,16384,0.3070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,32768,0.3328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,65536,0.3911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,8,1,1,131072,0.5227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,2,0.2766
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,4,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,8,0.2797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,16,0.2857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,32,0.2800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,64,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,128,0.2758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,256,0.2804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,512,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,1024,0.2845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,2048,0.2839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,4096,0.4089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,8192,0.3074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,16384,0.3187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,32768,0.3462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,65536,0.4226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,2,0.3013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,16,1,1,131072,0.5616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,4,0.3050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,8,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,16,0.3063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,32,0.3061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,64,0.3002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,128,0.3044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,256,0.3073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,512,0.3000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,1024,0.3065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,2048,0.3068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,4096,0.4398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,8192,0.3337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,16384,0.3461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,32768,0.3861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,65536,0.4745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,32,1,1,131072,0.6199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,2,0.3574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,4,0.3568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,8,0.3595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,16,0.3565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,32,0.3559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,64,0.3573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,128,0.3599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,256,0.3561
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,512,0.3597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,1024,0.3577
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,2048,0.3595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,8192,0.3957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,4096,0.4967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,16384,0.4131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,32768,0.4644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,65536,0.5647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,2,0.4643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,64,1,1,131072,0.7591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,4,0.4632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,8,0.4642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,16,0.4648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,32,0.4636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,64,0.4654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,128,0.4590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,256,0.4591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,512,0.4641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,1024,0.4643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,2048,0.4701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,4096,0.6105
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,16384,0.5421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,8192,0.5099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,32768,0.6045
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,65536,0.7512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,2,0.7109
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,128,1,1,131072,1.0369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,4,0.7117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,8,0.7178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,32,0.7160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,16,0.7162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,64,0.7172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,128,0.7201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,256,0.7263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,1024,0.7322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,2048,0.7439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,4096,0.9951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,8192,0.8007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,16384,0.8555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,512,0.7298
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,32768,0.9706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,65536,1.2050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,4,1.1646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,2,1.1641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,8,1.1623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,16,1.1656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,32,1.1661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,256,1,1,131072,1.7717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,64,1.1694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,128,1.1766
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,256,1.1809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,512,1.1839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,2048,1.2073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,1024,1.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,4096,1.5868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,8192,1.3075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,16384,1.4328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,32768,1.6757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,512,1,1,65536,2.1374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2,1.9937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4,1.9946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8,1.9928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16,1.9966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32,1.9982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,64,2.0025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,128,2.0027
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,256,2.0092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,512,2.0197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,1024,2.0327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2048,2.0697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4096,2.5960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8192,2.2558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16384,2.4922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32768,2.9618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,2,0.2114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,4,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,8,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,16,0.2043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,32,0.2096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,64,0.2075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,128,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,256,0.2107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,512,0.2024
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,1024,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,2048,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,4096,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,8192,0.2298
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,16384,0.2422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,32768,0.2602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,65536,0.3329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1,1,1,131072,0.4343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,2,0.2153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,4,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,8,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,16,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,32,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,64,0.2128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,128,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,256,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,512,0.2172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,1024,0.2091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,2048,0.2096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,4096,0.3460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,8192,0.2371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,16384,0.2472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,32768,0.2799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,65536,0.3358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,2,1,1,131072,0.4489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,2,0.2177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,4,0.2166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,8,0.2227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,16,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,32,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,64,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,128,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,256,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,512,0.2174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,1024,0.2188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,2048,0.2229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,4096,0.3510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,8192,0.2478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,16384,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,32768,0.2820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,65536,0.3453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,4,1,1,131072,0.4521
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,2,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,4,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,16,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,8,0.2310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,32,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,64,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,128,0.2296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,256,0.2253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,512,0.2254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,1024,0.2279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,2048,0.2332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,4096,0.3565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,8192,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,32768,0.2918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,16384,0.2645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,65536,0.3573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,8,1,1,131072,0.4753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,2,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,4,0.2380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,8,0.2402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,16,0.2401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,32,0.2402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,64,0.2360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,128,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,256,0.2336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,1024,0.2397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,512,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,2048,0.2422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,4096,0.3729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,8192,0.2635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,16384,0.2743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,32768,0.3023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,65536,0.3779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,16,1,1,131072,0.5213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,2,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,4,0.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,8,0.2505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,16,0.2525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,32,0.2495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,64,0.2557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,128,0.2476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,512,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,1024,0.2542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,256,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,2048,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,8192,0.2808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,16384,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,32768,0.3287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,4096,0.3797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,65536,0.4165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,32,1,1,131072,0.5726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,2,0.2773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,4,0.2799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,8,0.2779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,16,0.2799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,32,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,64,0.2787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,256,0.2811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,512,0.2777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,128,0.2755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,1024,0.2845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,2048,0.2835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,4096,0.4169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,8192,0.3128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,16384,0.3357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,32768,0.3840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,65536,0.4886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,64,1,1,131072,0.6833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,2,0.3353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,8,0.3360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,16,0.3344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,32,0.3346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,4,0.3339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,64,0.3361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,128,0.3317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,256,0.3308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,512,0.3390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,1024,0.3393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,2048,0.3394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,4096,0.4731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,8192,0.3799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,32768,0.4745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,65536,0.6249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,16384,0.4104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,128,1,1,131072,0.9082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,2,0.4731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,8,0.4685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,4,0.4705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,16,0.4731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,32,0.4715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,64,0.4732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,128,0.4732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,256,0.4752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,512,0.4763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,1024,0.4770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,2048,0.4876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,4096,0.7381
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,8192,0.5445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,32768,0.7215
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,16384,0.6029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,65536,0.9544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,256,1,1,131072,1.5258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,4,0.7136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,8,0.7115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,16,0.7147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,32,0.7167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,64,0.7228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,128,0.7271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,2,0.7083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,256,0.7304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,512,0.7302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,2048,0.7542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,4096,1.1312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,1024,0.7400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,8192,0.8568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,16384,0.9783
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,32768,1.2172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,512,1,1,65536,1.6814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2,1.1732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4,1.1743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8,1.1780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16,1.1774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32,1.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,64,1.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,128,1.1808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,256,1.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,512,1.1966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2048,1.2401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,1024,1.2091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4096,1.7702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16384,1.6696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8192,1.4302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,2,0.1949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32768,2.1438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,4,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,8,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,16,0.1970
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,32,0.1908
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,64,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,128,0.1972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,256,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,512,0.1950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,1024,0.1927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,2048,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,4096,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,8192,0.2130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,16384,0.2230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,32768,0.2562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,65536,0.3208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1,1,1,131072,0.4251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,2,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,4,0.2085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,8,0.2081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,16,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,32,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,64,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,128,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,256,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,512,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,1024,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,2048,0.2067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,4096,0.3268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,8192,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,16384,0.2420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,32768,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,65536,0.3276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,2,1,1,131072,0.4384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,2,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,4,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,8,0.2179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,16,0.2085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,32,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,64,0.2192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,128,0.2157
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,256,0.2135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,512,0.2156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,2048,0.2169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,1024,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,4096,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,8192,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,16384,0.2543
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,32768,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,65536,0.3408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,4,1,1,131072,0.4541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,2,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,4,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,8,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,16,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,32,0.2258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,64,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,128,0.2188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,512,0.2229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,1024,0.2213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,256,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,2048,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,4096,0.3522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,8192,0.2492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,16384,0.2574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,32768,0.2831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,65536,0.3546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,8,1,1,131072,0.4787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,2,0.2271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,4,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,8,0.2307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,16,0.2330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,32,0.2335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,64,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,128,0.2308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,256,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,512,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,2048,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,4096,0.3649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,1024,0.2353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,8192,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,16384,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,32768,0.3032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,65536,0.3765
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,16,1,1,131072,0.5102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,2,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,8,0.2449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,4,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,16,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,32,0.2394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,64,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,256,0.2408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,128,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,512,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,1024,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,2048,0.2450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,4096,0.3780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,8192,0.2681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,16384,0.2894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,32768,0.3252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,65536,0.4121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,32,1,1,131072,0.5626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,2,0.2629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,4,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,8,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,16,0.2656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,32,0.2691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,64,0.2667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,128,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,256,0.2667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,512,0.2660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,1024,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,2048,0.2749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,4096,0.4032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,8192,0.3044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,16384,0.3231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,32768,0.3711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,65536,0.4765
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,2,0.3131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,64,1,1,131072,0.6771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,4,0.3163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,8,0.3187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,16,0.3167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,32,0.3179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,64,0.3149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,128,0.3159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,2048,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,1024,0.3224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,256,0.3179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,512,0.3202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,4096,0.4625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,8192,0.3638
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,16384,0.3908
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,65536,0.6077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,32768,0.4587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,2,0.4355
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,4,0.4375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,128,1,1,131072,0.8909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,8,0.4389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,16,0.4379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,32,0.4351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,64,0.4389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,128,0.4393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,256,0.4390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,2048,0.4547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,512,0.4451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,1024,0.4437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,4096,0.7063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,8192,0.5090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,16384,0.5689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,32768,0.6900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,65536,0.9235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,4,0.6508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,2,0.6493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,256,1,1,131072,1.4871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,8,0.6520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,16,0.6498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,32,0.6540
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,128,0.6616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,64,0.6581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,256,0.6616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,512,0.6664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,1024,0.6711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,2048,0.6875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,4096,1.0633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,8192,0.7921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,16384,0.9149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,32768,1.1527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2,1.0617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4,1.0610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16,1.0638
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,512,1,1,65536,1.6125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8,1.0659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32,1.0764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,64,1.0769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,128,1.0784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,256,1.0807
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,512,1.0869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2048,1.1307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4096,1.6629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,1024,1.1003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16384,1.5666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8192,1.3298
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,2,0.1891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,4,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32768,2.0251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,8,0.1926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,32,0.1849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,16,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,64,0.1945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,128,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,256,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,512,0.1857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,1024,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,2048,0.1939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,8192,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,4096,0.3228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,16384,0.2131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,32768,0.2462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,65536,0.3088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1,1,1,131072,0.4231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,2,0.1928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,4,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,8,0.1967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,16,0.1982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,32,0.1909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,64,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,128,0.1967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,256,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,512,0.1985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,1024,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,2048,0.1958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,4096,0.3267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,8192,0.2135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,16384,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,32768,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,65536,0.3224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,2,1,1,131072,0.4339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,2,0.1961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,4,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,8,0.2046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,16,0.2011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,32,0.2045
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,64,0.2032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,128,0.2074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,256,0.2092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,512,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,1024,0.2022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,4096,0.3368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,2048,0.2049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,8192,0.2328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,16384,0.2419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,32768,0.2682
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,65536,0.3360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,2,0.2076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,4,0.2125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,4,1,1,131072,0.4437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,8,0.2110
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,16,0.2133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,32,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,64,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,128,0.2152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,256,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,512,0.2156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,1024,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,2048,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,4096,0.3453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,8192,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,16384,0.2516
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,32768,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,65536,0.3416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,8,1,1,131072,0.4642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,2,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,4,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,8,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,16,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,32,0.2259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,64,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,128,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,256,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,512,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,1024,0.2238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,2048,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,4096,0.3609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,8192,0.2509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,16384,0.2665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,32768,0.2923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,65536,0.3641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,16,1,1,131072,0.5060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,2,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,4,0.2376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,8,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,16,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,32,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,64,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,128,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,256,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,512,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,1024,0.2372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,2048,0.2411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,4096,0.3686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,8192,0.2679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,16384,0.2770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,32768,0.3158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,65536,0.4041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,32,1,1,131072,0.5649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,4,0.2577
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,8,0.2555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,2,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,16,0.2601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,32,0.2539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,64,0.2539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,128,0.2571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,256,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,1024,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,2048,0.2638
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,512,0.2611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,4096,0.3928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,16384,0.3139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,32768,0.3636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,8192,0.2971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,65536,0.4717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,64,1,1,131072,0.6661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,4,0.3006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,2,0.2997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,8,0.2962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,16,0.2962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,32,0.3003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,64,0.3017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,128,0.3000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,256,0.3019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,512,0.3065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,1024,0.3060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,2048,0.3064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,4096,0.4437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,8192,0.3504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,16384,0.3793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,32768,0.4479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,65536,0.5926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,4,0.4086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,2,0.4116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,128,1,1,131072,0.8740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,8,0.4111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,16,0.4126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,32,0.4131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,64,0.4137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,128,0.4134
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,256,0.4126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,512,0.4166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,1024,0.4220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,2048,0.4289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,8192,0.4869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,16384,0.5479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,4096,0.6796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,32768,0.6634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,65536,0.9010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,2,0.6031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,4,0.6001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,256,1,1,131072,1.4626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,16,0.6025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,32,0.6026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,8,0.6008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,64,0.6049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,128,0.6070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,256,0.6136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,512,0.6132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,2048,0.6354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,4096,1.0162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,1024,0.6199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,16384,0.8619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,8192,0.7390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,32768,1.0971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,512,1,1,65536,1.5698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2,0.9554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4,0.9554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8,0.9543
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16,0.9575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32,0.9580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,64,0.9622
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,128,0.9620
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,256,0.9699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,512,0.9760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2048,1.0208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,1024,0.9928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4096,1.5457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,2,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8192,1.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16384,1.4642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32768,1.9256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,4,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,8,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,16,0.1863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,32,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,64,0.1849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,128,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,256,0.1778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,512,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,1024,0.1769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,2048,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,4096,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,8192,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,32768,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,16384,0.2067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,65536,0.2952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,2,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,4,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1,1,1,131072,0.4080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,8,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,16,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,32,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,64,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,128,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,256,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,512,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,1024,0.1907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,2048,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,4096,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,8192,0.2072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,16384,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,32768,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,65536,0.3080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,2,1,1,131072,0.4320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,4,0.2002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,2,0.1972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,8,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,16,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,32,0.1982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,64,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,128,0.2047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,256,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,512,0.2002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,1024,0.1991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,2048,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,4096,0.3311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,8192,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,32768,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,16384,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,65536,0.3205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,4,1,1,131072,0.4418
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,2,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,4,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,8,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,16,0.2073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,32,0.2151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,64,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,128,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,256,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,512,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,1024,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,2048,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,4096,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,8192,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,16384,0.2414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,32768,0.2751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,65536,0.3456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,8,1,1,131072,0.4650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,2,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,4,0.2134
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,8,0.2212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,16,0.2218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,32,0.2112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,64,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,128,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,256,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,512,0.2213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,1024,0.2149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,4096,0.3571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,2048,0.2223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,8192,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,16384,0.2624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,32768,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,65536,0.3684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,16,1,1,131072,0.5012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,2,0.2294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,4,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,8,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,16,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,32,0.2258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,64,0.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,128,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,256,0.2259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,512,0.2341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,1024,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,2048,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,4096,0.3629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,8192,0.2597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,16384,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,32768,0.3142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,65536,0.4057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,32,1,1,131072,0.5531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,2,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,4,0.2505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,8,0.2476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,32,0.2534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,16,0.2468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,64,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,128,0.2513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,256,0.2504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,512,0.2547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,1024,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,2048,0.2558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,4096,0.3941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,16384,0.3129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,32768,0.3589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,65536,0.4623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,8192,0.2887
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,64,1,1,131072,0.6634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,2,0.2914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,4,0.2876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,8,0.2900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,16,0.2907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,32,0.2903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,64,0.2897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,128,0.2879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,256,0.2935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,512,0.2980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,1024,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,2048,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,8192,0.3422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,4096,0.4412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,32768,0.4367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,16384,0.3724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,65536,0.5860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,128,1,1,131072,0.8642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,2,0.3958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,4,0.3985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,8,0.3983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,16,0.3978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,32,0.3988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,64,0.3994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,128,0.4001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,256,0.4029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,512,0.4016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,1024,0.4087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,2048,0.4162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,4096,0.6677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,8192,0.4733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,32768,0.6509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,65536,0.8926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,2,0.5756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,16384,0.5312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,256,1,1,131072,1.4532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,4,0.5794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,8,0.5773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,16,0.5785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,32,0.5783
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,64,0.5807
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,128,0.5862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,256,0.5878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,512,0.5923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,1024,0.5995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,2048,0.6116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,4096,0.9912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,16384,0.8410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,8192,0.7168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,32768,1.0760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2,0.9052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4,0.9032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,512,1,1,65536,1.5421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8,0.9034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16,0.9077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32,0.9068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,64,0.9101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,128,0.9134
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,256,0.9174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,512,0.9197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,1024,0.9392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2048,0.9639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4096,1.5053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8192,1.1637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16384,1.4055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,2,0.1785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,4,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,8,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32768,1.8615
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,16,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,32,0.1848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,64,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,128,0.1758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,256,0.1815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,512,0.1794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,1024,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,2048,0.1808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,4096,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,8192,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,16384,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,32768,0.2307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,65536,0.2998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1,1,1,131072,0.4080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,2,0.1849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,4,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,8,0.1926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,16,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,32,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,64,0.1803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,128,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,512,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,256,0.1821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,2048,0.1932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,4096,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,1024,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,8192,0.2022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,16384,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,32768,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,65536,0.3085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,2,1,1,131072,0.4129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,2,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,8,0.1947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,16,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,4,0.2013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,32,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,64,0.1951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,128,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,256,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,1024,0.2010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,512,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,2048,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,4096,0.3308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,8192,0.2197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,16384,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,32768,0.2601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,65536,0.3230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,4,1,1,131072,0.4388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,2,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,4,0.2067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,8,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,16,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,32,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,64,0.2092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,128,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,256,0.2021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,512,0.2051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,1024,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,2048,0.2071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,4096,0.3327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,8192,0.2311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,16384,0.2437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,32768,0.2699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,65536,0.3407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,4,0.2177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,2,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,8,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,16,0.2192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,8,1,1,131072,0.4662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,32,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,64,0.2125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,128,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,512,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,1024,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,256,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,2048,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,4096,0.3480
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,8192,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,16384,0.2557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,32768,0.2842
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,65536,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,2,0.2218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,4,0.2266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,16,1,1,131072,0.5013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,8,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,16,0.2254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,32,0.2252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,64,0.2208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,128,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,256,0.2225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,512,0.2287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,1024,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,2048,0.2330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,4096,0.3656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,8192,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,16384,0.2717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,32768,0.3160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,65536,0.3957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,32,1,1,131072,0.5558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,2,0.2470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,4,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,16,0.2483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,32,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,8,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,64,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,128,0.2463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,256,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,512,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,2048,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,4096,0.3858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,1024,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,8192,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,16384,0.3121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,32768,0.3568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,65536,0.4653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,64,1,1,131072,0.6589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,2,0.2819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,4,0.2855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,8,0.2869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,16,0.2860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,32,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,64,0.2820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,128,0.2876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,256,0.2841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,512,0.2933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,1024,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,2048,0.2989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,4096,0.4372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,8192,0.3384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,16384,0.3664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,32768,0.4400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,65536,0.5806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,2,0.3916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,128,1,1,131072,0.8662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,4,0.3909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,8,0.3919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,16,0.3926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,32,0.3904
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,64,0.3944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,128,0.3911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,256,0.3966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,512,0.3963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,2048,0.4113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,1024,0.4000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,4096,0.6598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,16384,0.5251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,8192,0.4666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,65536,0.8818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,32768,0.6446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,256,1,1,131072,1.4424
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,4,0.5656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,2,0.5639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,8,0.5672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,16,0.5646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,32,0.5651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,64,0.5702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,128,0.5721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,256,0.5746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,512,0.5779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,2048,0.6018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,1024,0.5860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,4096,1.0824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,8192,0.7038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,16384,0.8267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,32768,1.0664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,512,1,1,65536,1.5310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2,0.8750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4,0.8803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8,0.8786
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32,0.8825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16,0.8801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,64,0.8863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,128,0.8872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,256,0.8895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,512,0.8966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,1024,0.9174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2048,0.9435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4096,1.4743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8192,1.1469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16384,1.3856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32768,1.8459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,2,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,4,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,8,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,16,0.1768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,32,0.1756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,128,0.1795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,64,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,256,0.1703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,512,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,1024,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,2048,0.1796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,4096,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,8192,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,16384,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,32768,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,65536,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1,1,1,131072,0.4077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,2,0.1796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,4,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,8,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,16,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,32,0.1849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,64,0.1754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,256,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,128,0.1795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,512,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,1024,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,4096,0.1954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,8192,0.2008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,2048,0.1903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,16384,0.2044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,32768,0.2453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,65536,0.3000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,2,1,1,131072,0.4158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,2,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,4,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,8,0.1944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,16,0.1871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,32,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,128,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,64,0.1964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,256,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,512,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,1024,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,4096,0.3267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,2048,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,8192,0.2135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,16384,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,32768,0.2604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,65536,0.3216
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,4,1,1,131072,0.4449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,2,0.2025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,4,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,8,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,32,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,16,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,64,0.2068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,128,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,256,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,512,0.2051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,1024,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,2048,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,4096,0.3278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,8192,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,16384,0.2436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,32768,0.2755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,65536,0.3350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,8,1,1,131072,0.4633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,2,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,4,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,16,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,32,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,8,0.2046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,64,0.2164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,128,0.2049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,256,0.2111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,512,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,1024,0.2179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,2048,0.2152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,8192,0.2461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,4096,0.3504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,16384,0.2510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,32768,0.2844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,65536,0.3637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,16,1,1,131072,0.5015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,2,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,4,0.2271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,8,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,16,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,32,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,64,0.2179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,128,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,256,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,512,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,1024,0.2316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,2048,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,8192,0.2568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,4096,0.3568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,32768,0.3090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,16384,0.2702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,2,0.2474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,65536,0.4001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,32,1,1,131072,0.5516
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,4,0.2464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,8,0.2408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,16,0.2488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,32,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,64,0.2441
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,128,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,256,0.2481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,512,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,1024,0.2518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,2048,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,4096,0.3897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,8192,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,16384,0.3044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,32768,0.3582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,65536,0.4629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,64,1,1,131072,0.6592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,2,0.2790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,4,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,8,0.2855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,16,0.2852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,32,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,64,0.2853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,128,0.2840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,256,0.2881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,512,0.2871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,1024,0.2956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,2048,0.2984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,4096,0.4349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,8192,0.3343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,16384,0.3686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,32768,0.4376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,65536,0.5820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,128,1,1,131072,0.8591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,2,0.3879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,4,0.3881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,8,0.3868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,16,0.3868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,32,0.3891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,64,0.3885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,256,0.3884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,512,0.3941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,128,0.3895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,1024,0.3988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,2048,0.4071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,8192,0.4630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,4096,0.6577
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,16384,0.5203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,32768,0.6398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,65536,0.8780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,256,1,1,131072,1.4453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,2,0.5593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,4,0.5581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,16,0.5606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,8,0.5600
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,32,0.5624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,64,0.5608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,128,0.5684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,256,0.5701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,512,0.5734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,2048,0.5956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,1024,0.5783
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,4096,0.9730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,8192,0.6981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,16384,0.8204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,32768,1.0594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,512,1,1,65536,1.5291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2,0.8634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8,0.8646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16,0.8684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4,0.8659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32,0.8703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,64,0.8702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,128,0.8747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,256,0.8780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,512,0.8850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2048,0.9312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4096,1.4621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16384,1.3682
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,1024,0.8991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8192,1.1313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,2,0.1753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32768,1.8259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,4,0.1793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,8,0.1687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,16,0.1736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,32,0.1719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,64,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,128,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,512,0.1763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,256,0.1761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,1024,0.1775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,2048,0.1673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,4096,0.3020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,8192,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,32768,0.2254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,16384,0.2045
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,65536,0.2955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,2,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1,1,1,131072,0.4085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,4,0.1785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,8,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,32,0.1794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,64,0.1860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,16,0.1754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,128,0.1809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,512,0.1802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,256,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,1024,0.1793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,4096,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,2048,0.1789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,8192,0.2051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,16384,0.2047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,32768,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,65536,0.3067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,2,1,1,131072,0.4206
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,2,0.1843
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,4,0.1842
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,8,0.1910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,16,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,32,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,64,0.1954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,128,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,256,0.1922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,512,0.1845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,1024,0.1850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,2048,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,8192,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,4096,0.3184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,16384,0.2278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,32768,0.2572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,65536,0.3195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,4,1,1,131072,0.4299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,2,0.1944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,8,0.1992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,4,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,16,0.2006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,32,0.2033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,64,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,128,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,512,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,256,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,1024,0.1991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,2048,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,4096,0.3307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,8192,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,16384,0.2362
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,32768,0.2688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,65536,0.3343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,8,1,1,131072,0.4560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,2,0.2065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,4,0.2131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,8,0.2117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,16,0.2095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,32,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,64,0.2116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,128,0.2012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,256,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,512,0.2095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,1024,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,2048,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,4096,0.3429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,8192,0.2395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,16384,0.2475
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,32768,0.2873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,65536,0.3552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,16,1,1,131072,0.4957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,2,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,4,0.2192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,8,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,16,0.2211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,32,0.2195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,64,0.2198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,128,0.2177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,256,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,512,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,1024,0.2174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,2048,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,4096,0.3591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,8192,0.2546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,16384,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,32768,0.3041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,65536,0.3919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,32,1,1,131072,0.5518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,2,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,4,0.2445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,16,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,8,0.2419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,32,0.2392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,64,0.2424
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,128,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,256,0.2462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,512,0.2389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,1024,0.2498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,2048,0.2509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,8192,0.2789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,16384,0.3039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,4096,0.3834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,65536,0.4597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,32768,0.3513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,64,1,1,131072,0.6495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,2,0.2803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,4,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,8,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,16,0.2796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,64,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,32,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,128,0.2818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,256,0.2760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,512,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,1024,0.2898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,2048,0.2951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,4096,0.4267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,8192,0.3336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,16384,0.3648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,32768,0.4319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,65536,0.5722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,128,1,1,131072,0.8619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,4,0.3837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,2,0.3855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,8,0.3835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,32,0.3850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,64,0.3834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,16,0.3856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,128,0.3845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,256,0.3879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,512,0.3919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,1024,0.3944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,2048,0.4036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,4096,0.6556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,8192,0.4596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,16384,0.5169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,32768,0.6365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,65536,0.8725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,2,0.5557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,256,1,1,131072,1.4358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,4,0.5541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,8,0.5557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,16,0.5544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,64,0.5582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,32,0.5529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,128,0.5633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,256,0.5660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,512,0.5673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,1024,0.5738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,2048,0.5895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,4096,0.9674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,8192,0.6910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,16384,0.8175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,32768,1.0591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2,0.8587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,512,1,1,65536,1.5226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8,0.8607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16,0.8578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4,0.8594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32,0.8640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,64,0.8666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,128,0.8666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,256,0.8714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,512,0.8808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2048,0.9188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,1024,0.8934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4096,1.4662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8192,1.1189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16384,1.3643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,2,0.2087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32768,1.8188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,4,0.2065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,8,0.2042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,16,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,32,0.2026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,64,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,128,0.2024
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,256,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,512,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,1024,0.2091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,2048,0.2106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,4096,0.2274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,8192,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,16384,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,32768,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,65536,0.3269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1,1,1,131072,0.4366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,2,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,4,0.2166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,8,0.2194
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,16,0.2110
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,32,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,64,0.2176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,128,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,256,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,512,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,1024,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,2048,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,4096,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,8192,0.2401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,16384,0.2542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,32768,0.2800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,65536,0.3412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,2,1,1,131072,0.4538
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,2,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,4,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,8,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,16,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,32,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,64,0.2217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,128,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,256,0.2223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,512,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,1024,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,2048,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,4096,0.3527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,8192,0.2495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,16384,0.2623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,32768,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,65536,0.3493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,4,1,1,131072,0.4656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,2,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,4,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,8,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,16,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,32,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,64,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,256,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,128,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,512,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,1024,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,2048,0.2310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,4096,0.3661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,8192,0.2589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,16384,0.2728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,32768,0.2992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,65536,0.3667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,8,1,1,131072,0.4820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,2,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,8,0.2401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,4,0.2392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,16,0.2419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,32,0.2404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,64,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,128,0.2420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,256,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,512,0.2404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,2048,0.2506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,1024,0.2419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,4096,0.3769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,8192,0.2746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,16384,0.2853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,32768,0.3173
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,65536,0.3848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,16,1,1,131072,0.5234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,2,0.2568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,4,0.2506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,8,0.2520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,16,0.2543
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,32,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,64,0.2522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,128,0.2512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,256,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,512,0.2581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,2048,0.2625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,1024,0.2556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,4096,0.3962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,8192,0.2898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,32768,0.3365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,65536,0.4228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,16384,0.3038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,2,0.2727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,32,1,1,131072,0.5831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,4,0.2768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,8,0.2762
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,16,0.2812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,32,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,64,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,128,0.2788
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,256,0.2796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,512,0.2764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,1024,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,2048,0.2870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,4096,0.4236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,8192,0.3166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,16384,0.3408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,32768,0.3896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,65536,0.4932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,64,1,1,131072,0.6862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,2,0.3474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,4,0.3588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,8,0.3483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,16,0.3487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,32,0.3610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,64,0.3478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,128,0.3485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,256,0.3471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,512,0.3606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,1024,0.3593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,2048,0.3739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,4096,0.5138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,8192,0.4150
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,16384,0.4453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,32768,0.5148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,65536,0.6565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,128,1,1,131072,0.9314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,4,0.4823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,2,0.4818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,8,0.4993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,16,0.5086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,32,0.5155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,64,0.4867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,128,0.4894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,256,0.5127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,512,0.5047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,2048,0.5442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,1024,0.5200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,4096,0.8031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,16384,0.6622
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,8192,0.6046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,32768,0.7820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,65536,1.0153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,2,0.7616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,4,0.7596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,8,0.7603
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,16,0.7611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,32,0.8339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,256,1,1,131072,1.5698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,64,0.8357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,128,0.7669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,256,0.7808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,512,0.7930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,2048,0.8676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,1024,0.8163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,4096,1.3597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,8192,0.9808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,16384,1.1037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,32768,1.3368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,512,1,1,65536,1.8003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,4,1.3231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,8,1.3282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,16,1.4874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,64,1.4723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,32,1.4630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,2,1.4670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,128,1.4634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,256,1.3699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,512,1.3914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,2048,1.5472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,4096,2.0751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,1024,1.4382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,8192,1.7485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,16384,1.9945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,2,0.1863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,128,1024,1,1,32768,2.4556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,4,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,8,0.1862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,16,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,32,0.1897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,64,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,128,0.1905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,256,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,512,0.1859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,1024,0.1920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,2048,0.1891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,4096,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,8192,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,16384,0.2236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,32768,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,65536,0.3023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1,1,1,131072,0.4303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,2,0.1948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,4,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,16,0.1993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,8,0.1947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,32,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,64,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,128,0.1932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,256,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,512,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,2048,0.1992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,8192,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,1024,0.1990
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,4096,0.3214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,16384,0.2340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,32768,0.2620
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,65536,0.3188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,2,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,4,0.2024
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,2,1,1,131072,0.4376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,8,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,32,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,16,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,64,0.2035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,128,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,256,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,1024,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,2048,0.2076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,512,0.2032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,4096,0.3341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,8192,0.2279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,16384,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,32768,0.2658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,65536,0.3369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,4,1,1,131072,0.4537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,2,0.2081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,8,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,16,0.2028
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,32,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,4,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,64,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,128,0.2104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,256,0.2132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,512,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,2048,0.2127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,4096,0.3490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,8192,0.2464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,16384,0.2543
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,1024,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,32768,0.2868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,65536,0.3527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,8,1,1,131072,0.4766
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,2,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,4,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,8,0.2179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,32,0.2157
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,16,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,64,0.2198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,128,0.2227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,256,0.2153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,512,0.2217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,1024,0.2212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,2048,0.2330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,4096,0.3612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,8192,0.2577
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,16384,0.2706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,32768,0.3035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,65536,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,16,1,1,131072,0.5076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,2,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,4,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,8,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,16,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,32,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,64,0.2332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,128,0.2276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,256,0.2317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,512,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,1024,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,2048,0.2449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,4096,0.3790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,8192,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,16384,0.2889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,32768,0.3203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,65536,0.4073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,2,0.2503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,32,1,1,131072,0.5592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,4,0.2530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,8,0.2535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,16,0.2530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,32,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,64,0.2516
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,128,0.2530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,512,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,1024,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,2048,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,4096,0.4001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,8192,0.2971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,256,0.2527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,16384,0.3144
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,32768,0.3667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,65536,0.4683
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,4,0.3146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,8,0.3108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,16,0.3095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,2,0.3084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,64,1,1,131072,0.6634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,32,0.3055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,64,0.3092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,128,0.3096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,256,0.3135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,2048,0.3344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,1024,0.3232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,4096,0.4723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,512,0.3237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,8192,0.3747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,16384,0.4066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,32768,0.4752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,65536,0.6217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,2,0.4169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,4,0.4290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,128,1,1,131072,0.9049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,8,0.4430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,16,0.4306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,32,0.4172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,64,0.4183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,128,0.4208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,256,0.4306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,512,0.4315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,2048,0.4737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,1024,0.4534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,4096,0.7280
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,8192,0.5372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,16384,0.5906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,32768,0.7087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,65536,0.9447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,4,0.6707
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,8,0.6723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,2,0.7136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,256,1,1,131072,1.5041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,16,0.6329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,32,0.6387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,64,0.6438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,128,0.6463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,256,0.6958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,512,0.7038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,2048,0.7417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,1024,0.6949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,4096,1.1300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,8192,0.8602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,16384,0.9796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,32768,1.2130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,2,1.1782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,512,1,1,65536,1.6631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,8,1.2621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,16,1.1043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,4,1.1002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,32,1.1087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,64,1.1154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,128,1.1153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,256,1.1432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,512,1.1856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,2048,1.3133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,1024,1.2043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,4096,1.8629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,16384,1.7661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,8192,1.5281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,2,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,64,1024,1,1,32768,2.2212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,8,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,16,0.1782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,4,0.1753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,32,0.1702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,64,0.1732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,128,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,512,0.1670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,256,0.1798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,1024,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,2048,0.1763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,4096,0.3098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,8192,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,16384,0.2044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,32768,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,65536,0.2980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1,1,1,131072,0.4112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,2,0.1867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,4,0.1840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,8,0.1825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,16,0.1762
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,32,0.1819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,64,0.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,128,0.1847
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,256,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,512,0.1865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,1024,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,2048,0.1846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,4096,0.3061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,8192,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,32768,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,16384,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,65536,0.3102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,2,1,1,131072,0.4232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,2,0.1917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,4,0.1880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,8,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,16,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,32,0.1919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,64,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,128,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,256,0.1930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,1024,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,2048,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,512,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,4096,0.3232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,8192,0.2128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,16384,0.2192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,32768,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,65536,0.3240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,4,1,1,131072,0.4377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,2,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,4,0.2002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,8,0.1991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,16,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,64,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,32,0.1948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,128,0.2012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,256,0.2006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,512,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,1024,0.2031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,2048,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,4096,0.3301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,8192,0.2226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,16384,0.2410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,32768,0.2741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,65536,0.3456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,8,1,1,131072,0.4659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,2,0.2074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,4,0.2052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,8,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,16,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,32,0.2041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,128,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,64,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,256,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,512,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,1024,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,2048,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,4096,0.3471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,8192,0.2459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,16384,0.2616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,32768,0.2915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,65536,0.3646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,16,1,1,131072,0.5014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,2,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,4,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,8,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,16,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,32,0.2167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,64,0.2166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,128,0.2153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,256,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,512,0.2205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,1024,0.2219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,2048,0.2310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,4096,0.3637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,8192,0.2614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,16384,0.2757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,32768,0.3119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,65536,0.3927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,2,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,4,0.2329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,8,0.2278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,16,0.2319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,32,1,1,131072,0.5534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,32,0.2322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,64,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,128,0.2267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,256,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,512,0.2372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,1024,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,2048,0.2494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,4096,0.3844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,8192,0.2805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,32768,0.3454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,16384,0.3002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,65536,0.4553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,64,1,1,131072,0.6480
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,2,0.2757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,4,0.2808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,8,0.2811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,16,0.2873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,32,0.2772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,64,0.2835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,128,0.2801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,256,0.2858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,512,0.2869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,1024,0.2976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,2048,0.3101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,4096,0.4499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,8192,0.3470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,16384,0.3818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,32768,0.4485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,65536,0.5943
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,2,0.3704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,4,0.3721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,8,0.3844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,128,1,1,131072,0.8675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,16,0.3693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,32,0.3825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,64,0.3716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,128,0.3733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,256,0.3931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,512,0.3867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,1024,0.3992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,2048,0.4242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,4096,0.6767
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,16384,0.5396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,8192,0.4864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,32768,0.6608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,2,0.5305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,16,0.5733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,8,0.5323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,4,0.5739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,65536,0.8964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,256,1,1,131072,1.4535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,32,0.6042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,64,0.5380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,128,0.5405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,256,0.5515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,512,0.5975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,2048,0.6354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,1024,0.6047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,4096,1.0246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,8192,0.7527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,16384,0.8725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,32768,1.1089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,2,0.9703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,4,1.0363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,16,0.9050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,8,0.8963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,64,0.9195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,512,1,1,65536,1.5697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,32,0.9138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,128,0.9209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,256,1.0091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,512,0.9780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,2048,1.1116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,1024,1.0106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,4096,1.6558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,16384,1.5688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,8192,1.3289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,2,0.1723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,4,0.1671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,32,1024,1,1,32768,2.0099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,8,0.1652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,16,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,32,0.1708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,64,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,128,0.1718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,256,0.1716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,512,0.1670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,1024,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,2048,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,4096,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,8192,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,16384,0.2052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,32768,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,65536,0.2895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1,1,1,131072,0.3944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,2,0.1838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,4,0.1720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,8,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,16,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,32,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,64,0.1799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,128,0.1804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,256,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,512,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,1024,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,2048,0.1838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,4096,0.3091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,8192,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,16384,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,32768,0.2374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,65536,0.3063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,2,1,1,131072,0.4058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,2,0.1774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,4,0.1868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,32,0.1870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,64,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,8,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,16,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,256,0.1806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,128,0.1851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,512,0.1772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,1024,0.1877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,2048,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,16384,0.2108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,32768,0.2432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,65536,0.3116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,4096,0.3132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,8192,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,4,1,1,131072,0.4264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,2,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,4,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,8,0.1885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,16,0.1951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,32,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,64,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,128,0.1964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,256,0.1890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,512,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,1024,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,2048,0.1910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,4096,0.3285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,8192,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,32768,0.2660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,16384,0.2331
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,65536,0.3329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,8,1,1,131072,0.4635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,2,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,4,0.1963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,8,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,16,0.2005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,32,0.2034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,256,0.2022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,1024,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,64,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,512,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,128,0.1973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,2048,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,4096,0.3410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,8192,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,16384,0.2526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,32768,0.2886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,65536,0.3560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,16,1,1,131072,0.4964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,2,0.2041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,4,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,8,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,16,0.2082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,32,0.2095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,64,0.2022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,128,0.2063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,256,0.2071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,512,0.2065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,1024,0.2145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,2048,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,4096,0.3605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,8192,0.2543
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,16384,0.2700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,32768,0.3014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,65536,0.3891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,32,1,1,131072,0.5404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,2,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,4,0.2226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,8,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,16,0.2341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,32,0.2219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,64,0.2225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,128,0.2198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,256,0.2171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,512,0.2257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,1024,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,2048,0.2444
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,4096,0.3759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,8192,0.2694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,16384,0.2940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,32768,0.3408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,65536,0.4407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,64,1,1,131072,0.6383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,2,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,4,0.2657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,8,0.2816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,16,0.2722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,32,0.2720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,64,0.2655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,128,0.2640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,256,0.2734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,1024,0.2899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,512,0.2773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,2048,0.2968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,4096,0.4298
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,8192,0.3370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,16384,0.3673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,32768,0.4347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,65536,0.5784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,128,1,1,131072,0.8587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,2,0.3476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,4,0.3841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,8,0.3743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,16,0.3617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,32,0.3487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,64,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,128,0.3508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,256,0.3763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,512,0.3643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,1024,0.3776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,2048,0.3972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,4096,0.6574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,8192,0.4642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,16384,0.5184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,32768,0.6396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,65536,0.8768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,2,0.5737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,4,0.5593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,256,1,1,131072,1.4328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,8,0.5301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,16,0.4884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,32,0.4878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,64,0.5531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,128,0.4973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,256,0.5040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,512,0.5299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,2048,0.5921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,1024,0.5405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,4096,0.9782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,8192,0.7042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,16384,0.8250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,32768,1.0618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,512,1,1,65536,1.5235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,2,0.8723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,4,0.7931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,8,0.8735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,16,0.8029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,32,0.8063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,64,0.8126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,128,0.9379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,256,0.8308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,512,0.8559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,2048,1.0166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,4096,1.5528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,1024,0.9228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,8192,1.2308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,16384,1.4596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,4,0.1721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,2,0.1636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,8,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,16,1024,1,1,32768,1.9171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,16,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,32,0.1671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,64,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,128,0.1651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,256,0.1728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,512,0.1631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,1024,0.1722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,2048,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,8192,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,4096,0.3006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,16384,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,32768,0.2234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,65536,0.2916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1,1,1,131072,0.3896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,2,0.1782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,8,0.1772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,4,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,16,0.1769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,32,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,64,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,128,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,256,0.1711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,512,0.1777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,1024,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,2048,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,4096,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,8192,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,16384,0.2105
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,32768,0.2273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,65536,0.2920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,2,1,1,131072,0.4069
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,4,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,8,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,2,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,16,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,32,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,64,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,128,0.1794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,512,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,256,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,1024,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,2048,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,4096,0.3118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,8192,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,16384,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,32768,0.2404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,65536,0.3152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,2,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,4,1,1,131072,0.4253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,4,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,8,0.1933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,16,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,32,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,64,0.1856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,128,0.1958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,256,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,512,0.1933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,1024,0.1962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,2048,0.1947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,4096,0.3264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,8192,0.2115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,16384,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,32768,0.2638
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,65536,0.3353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,8,1,1,131072,0.4586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,2,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,4,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,8,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,16,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,32,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,64,0.2034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,128,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,256,0.2011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,512,0.2023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,1024,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,2048,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,4096,0.3344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,16384,0.2517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,8192,0.2259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,32768,0.2839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,65536,0.3535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,2,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,16,1,1,131072,0.4944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,4,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,8,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,32,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,16,0.2050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,64,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,128,0.2010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,256,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,512,0.2065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,2048,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,4096,0.3520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,8192,0.2494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,16384,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,1024,0.2132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,32768,0.3062
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,65536,0.3856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,32,1,1,131072,0.5404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,2,0.2189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,4,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,8,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,16,0.2176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,32,0.2250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,64,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,128,0.2087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,256,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,512,0.2194
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,1024,0.2308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,2048,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,4096,0.3698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,8192,0.2725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,16384,0.2908
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,32768,0.3344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,65536,0.4410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,64,1,1,131072,0.6357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,2,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,4,0.2577
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,16,0.2627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,8,0.2582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,32,0.2574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,64,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,128,0.2580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,256,0.2665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,512,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,2048,0.2895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,1024,0.2780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,8192,0.3322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,4096,0.4284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,16384,0.3562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,32768,0.4290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,65536,0.5735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,128,1,1,131072,0.8500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,4,0.3332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,8,0.3679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,16,0.3463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,2,0.3348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,32,0.3335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,64,0.3357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,128,0.3409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,256,0.3476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,512,0.3676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,2048,0.3904
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,1024,0.3716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,4096,0.6414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,8192,0.4514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,16384,0.5079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,32768,0.6249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,2,0.4652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,4,0.4649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,8,0.4638
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,65536,0.8686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,32,0.4656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,256,1,1,131072,1.4212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,16,0.4648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,64,0.4699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,128,0.4754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,256,0.5200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,512,0.4934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,2048,0.5706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,1024,0.5177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,4096,1.0582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,8192,0.6820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,16384,0.8025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,32768,1.0375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,512,1,1,65536,1.5038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,2,0.7439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,4,0.7488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,8,0.7526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,32,0.7589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,16,0.8297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,64,0.7655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,128,0.7678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,256,0.7854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,512,0.8104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,2048,0.9647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,4096,1.4943
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,1024,0.8541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,8192,1.1844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,16384,1.4153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,2,0.1609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,4,0.1604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,8,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,16,0.1667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,32,0.1665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,64,0.1679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,128,0.1712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,8,1024,1,1,32768,1.8642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,256,0.1606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,512,0.1611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,1024,0.1713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,2048,0.1672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,4096,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,8192,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,16384,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,32768,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,65536,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1,1,1,131072,0.3845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,2,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,8,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,4,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,16,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,32,0.1795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,128,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,64,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,256,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,1024,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,4096,0.3060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,2048,0.1816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,512,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,8192,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,16384,0.2095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,32768,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,65536,0.2880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,2,1,1,131072,0.4026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,2,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,4,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,8,0.1851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,16,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,64,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,32,0.1865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,128,0.1732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,256,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,512,0.1835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,1024,0.1869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,2048,0.1818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,4096,0.3105
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,8192,0.2014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,32768,0.2420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,16384,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,65536,0.3091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,2,0.1958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,4,1,1,131072,0.4265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,4,0.1932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,8,0.1843
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,16,0.1939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,32,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,64,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,128,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,512,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,256,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,1024,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,2048,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,4096,0.3241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,8192,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,16384,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,32768,0.2634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,65536,0.3357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,8,1,1,131072,0.4597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,2,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,4,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,8,0.2001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,16,0.1954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,32,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,64,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,128,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,256,0.1984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,512,0.1982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,1024,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,4096,0.3321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,2048,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,8192,0.2338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,16384,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,32768,0.2818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,65536,0.3560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,16,1,1,131072,0.4878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,4,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,2,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,8,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,16,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,32,0.1979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,64,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,128,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,256,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,512,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,2048,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,4096,0.3555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,1024,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,8192,0.2464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,16384,0.2660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,32768,0.3006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,65536,0.3841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,2,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,4,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,8,0.2129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,32,1,1,131072,0.5340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,16,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,32,0.2169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,64,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,128,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,256,0.2156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,512,0.2266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,1024,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,2048,0.2327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,8192,0.2670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,4096,0.3656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,16384,0.2889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,32768,0.3351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,65536,0.4391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,64,1,1,131072,0.6356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,4,0.2608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,8,0.2600
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,16,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,2,0.2527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,32,0.2584
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,64,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,128,0.2582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,256,0.2710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,2048,0.2869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,1024,0.2715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,512,0.2647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,4096,0.4250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,8192,0.3274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,16384,0.3574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,32768,0.4267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,65536,0.5690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,2,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,128,1,1,131072,0.8502
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,8,0.3303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,4,0.3534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,16,0.3292
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,32,0.3298
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,64,0.3417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,128,0.3321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,256,0.3479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,512,0.3640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,1024,0.3603
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,2048,0.3817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,4096,0.6384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,8192,0.4451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,16384,0.5001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,32768,0.6202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,2,0.4912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,4,0.4546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,65536,0.8570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,256,1,1,131072,1.4143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,8,0.4542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,16,0.4543
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,32,0.4955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,64,0.4564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,128,0.4641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,256,0.4724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,512,0.5005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,2048,0.5536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,1024,0.5250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,4096,1.0483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,8192,0.6677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,16384,0.7952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,32768,1.0247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,512,1,1,65536,1.4867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,2,0.8049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,4,0.7284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,8,0.7266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,32,0.8065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,16,0.7297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,64,0.7404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,128,0.7464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,256,0.7640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,512,0.8134
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,2048,0.9416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,1024,0.8308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,4096,1.4814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,8192,1.1530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,16384,1.3914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,4,1024,1,1,32768,1.8423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,2,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,4,0.1693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,8,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,16,0.1604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,32,0.1617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,64,0.1649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,128,0.1576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,256,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,512,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,1024,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,2048,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,4096,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,16384,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,32768,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,8192,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,65536,0.2853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1,1,1,131072,0.3930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,2,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,4,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,8,0.1680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,16,0.1675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,32,0.1764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,64,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,128,0.1752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,256,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,512,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,1024,0.1690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,2048,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,4096,0.2958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,8192,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,16384,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,32768,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,65536,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,2,1,1,131072,0.4049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,2,0.1754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,4,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,8,0.1757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,16,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,32,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,64,0.1841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,128,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,256,0.1835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,512,0.1759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,1024,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,2048,0.1778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,8192,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,4096,0.3146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,16384,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,32768,0.2395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,65536,0.3034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,4,1,1,131072,0.4174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,2,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,4,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,8,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,16,0.1941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,64,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,128,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,32,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,256,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,512,0.1917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,1024,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,2048,0.1929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,4096,0.3226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,8192,0.2107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,16384,0.2197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,32768,0.2592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,65536,0.3236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,8,1,1,131072,0.4588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,2,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,4,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,8,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,16,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,32,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,64,0.1986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,128,0.1898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,256,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,512,0.2005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,1024,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,2048,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,4096,0.3313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,8192,0.2253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,16384,0.2456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,32768,0.2738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,65536,0.3575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,16,1,1,131072,0.4923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,2,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,4,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,8,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,16,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,32,0.2029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,64,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,128,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,256,0.2081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,512,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,1024,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,2048,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,8192,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,16384,0.2572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,32768,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,4096,0.3513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,65536,0.3853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,32,1,1,131072,0.5327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,2,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,4,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,8,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,16,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,32,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,64,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,128,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,256,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,512,0.2143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,1024,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,8192,0.2659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,2048,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,4096,0.3680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,16384,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,32768,0.3326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,65536,0.4354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,64,1,1,131072,0.6318
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,2,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,4,0.2470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,8,0.2553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,16,0.2503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,32,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,64,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,128,0.2491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,256,0.2717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,512,0.2659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,1024,0.2704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,2048,0.2846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,4096,0.4246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,8192,0.3276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,16384,0.3570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,32768,0.4201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,65536,0.5705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,128,1,1,131072,0.8475
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,2,0.3228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,8,0.3275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,16,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,4,0.3401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,64,0.3244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,32,0.3388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,128,0.3329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,256,0.3388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,512,0.3419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,1024,0.3571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,2048,0.3805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,4096,0.6340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,16384,0.5001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,8192,0.4395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,32768,0.6152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,65536,0.8500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,256,1,1,131072,1.4116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,2,0.4485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,4,0.4479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,8,0.4501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,16,0.4923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,32,0.4483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,64,0.4547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,128,0.5286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,256,0.4645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,512,0.4789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,2048,0.5486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,1024,0.4997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,4096,1.0427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,16384,0.7843
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,8192,0.6624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,32768,1.0220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,2,0.7160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,512,1,1,65536,1.4841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,4,0.7167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,8,0.7179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,16,0.7237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,32,0.7256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,64,0.7304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,128,0.7369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,256,0.7537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,2048,0.9299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,512,0.7748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,1024,0.8177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,16384,1.3759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,4096,1.4667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,32768,1.8386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,2,1024,1,1,8192,1.1357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,2,0.1612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,4,0.1628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,8,0.1566
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,16,0.1644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,32,0.1588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,64,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,128,0.1628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,256,0.1606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,512,0.1626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,2048,0.1648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,1024,0.1570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,4096,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,8192,0.1809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,16384,0.1990
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,32768,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,65536,0.2773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1,1,1,131072,0.3868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,2,0.1666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,4,0.1736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,8,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,16,0.1667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,32,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,64,0.1752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,256,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,512,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,1024,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,128,0.1708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,2048,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,4096,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,8192,0.1948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,16384,0.1968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,32768,0.2295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,65536,0.2951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,2,1,1,131072,0.4002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,2,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,4,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,8,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,16,0.1801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,32,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,64,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,128,0.1769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,256,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,512,0.1748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,1024,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,2048,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,4096,0.3041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,8192,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,16384,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,32768,0.2341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,65536,0.2999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,4,1,1,131072,0.4161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,2,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,4,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,8,0.1808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,16,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,32,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,64,0.1886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,128,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,256,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,512,0.1903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,1024,0.1834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,2048,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,8192,0.2090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,4096,0.3123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,16384,0.2219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,32768,0.2536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,65536,0.3253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,8,1,1,131072,0.4551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,2,0.1869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,4,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,8,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,16,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,32,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,64,0.1941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,256,0.1928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,512,0.1933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,128,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,1024,0.1992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,2048,0.1960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,4096,0.3265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,8192,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,16384,0.2335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,32768,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,65536,0.3513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,16,1,1,131072,0.4867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,2,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,4,0.2005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,8,0.1939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,16,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,32,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,64,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,128,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,256,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,512,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,1024,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,2048,0.2106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,4096,0.3472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,8192,0.2396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,16384,0.2629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,32768,0.2985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,65536,0.3790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,32,1,1,131072,0.5308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,2,0.2126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,4,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,8,0.2072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,16,0.2028
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,32,0.2083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,64,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,128,0.2072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,256,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,512,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,1024,0.2196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,2048,0.2307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,4096,0.3593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,8192,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,16384,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,32768,0.3289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,65536,0.4332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,2,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,64,1,1,131072,0.6281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,4,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,16,0.2432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,8,0.2503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,32,0.2641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,64,0.2481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,128,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,256,0.2552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,512,0.2626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,1024,0.2636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,2048,0.2770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,4096,0.4224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,8192,0.3252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,16384,0.3533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,32768,0.4209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,65536,0.5658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,2,0.3210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,128,1,1,131072,0.8420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,4,0.3233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,8,0.3244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,16,0.3576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,32,0.3228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,64,0.3242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,128,0.3354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,256,0.3325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,512,0.3435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,1024,0.3517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,2048,0.3771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,4096,0.6322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,8192,0.4383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,16384,0.4966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,32768,0.6149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,65536,0.8572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,256,1,1,131072,1.4067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,2,0.4451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,4,0.4444
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,8,0.4429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,16,0.4430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,32,0.4438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,64,0.4491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,128,0.4519
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,256,0.4815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,512,0.4743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,2048,0.5444
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,4096,1.0383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,8192,0.6601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,1024,0.4969
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,16384,0.7821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,32768,1.0171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,2,0.7091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,512,1,1,65536,1.4825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,4,0.7094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,8,0.8717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,16,0.7136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,32,0.7198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,64,0.7268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,128,0.7297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,256,0.7461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,512,0.7953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,1024,0.8128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,2048,0.9211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,16384,1.3700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,4096,1.4198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,8192,1.1325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,float16,nvfp4,1,1024,1,1,32768,1.8306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,2,0.2174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,4,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,8,0.2227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,16,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,32,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,64,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,128,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,256,0.2335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,512,0.2143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,1024,0.2195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,2048,0.2225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,4096,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,8192,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,16384,0.2501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,32768,0.2790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,65536,0.3552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1,1,1,131072,0.4434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,2,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,4,0.2314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,8,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,16,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,64,0.2295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,128,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,32,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,256,0.2239
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,512,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,1024,0.2309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,2048,0.2320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,4096,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,16384,0.2646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,32768,0.2827
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,8192,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,65536,0.3441
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,2,1,1,131072,0.4644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,2,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,4,0.2389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,8,0.2421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,16,0.2402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,32,0.2443
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,64,0.2423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,128,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,256,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,512,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,1024,0.2410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,2048,0.2430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,4096,0.2587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,8192,0.2670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,16384,0.2729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,32768,0.3049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,65536,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,4,1,1,131072,0.4800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,2,0.2505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,4,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,8,0.2545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,16,0.2560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,64,0.2580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,128,0.2513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,32,0.2560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,256,0.2540
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,512,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,1024,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,2048,0.2527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,4096,0.3836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,8192,0.2803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,16384,0.2835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,32768,0.3189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,65536,0.3860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,8,1,1,131072,0.5035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,2,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,4,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,16,0.2685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,8,0.2657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,64,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,32,0.2662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,128,0.2659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,256,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,512,0.2663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,1024,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,2048,0.2705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,4096,0.3960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,8192,0.2876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,16384,0.3037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,32768,0.3295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,131072,0.5476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,16,1,1,65536,0.4087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,2,0.2859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,4,0.2926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,8,0.2906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,16,0.2860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,32,0.2849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,64,0.2902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,256,0.2874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,128,0.2897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,512,0.2859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,1024,0.2941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,2048,0.2928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,8192,0.3162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,4096,0.4189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,16384,0.3359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,32768,0.3721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,65536,0.4589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,32,1,1,131072,0.6098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,2,0.3433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,4,0.3414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,8,0.3372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,16,0.3367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,64,0.3406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,128,0.3413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,32,0.3395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,256,0.3414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,512,0.3437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,1024,0.3408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,4096,0.4754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,2048,0.3404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,8192,0.3728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,16384,0.3974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,32768,0.4474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,65536,0.5504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,64,1,1,131072,0.7482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,2,0.4366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,4,0.4320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,8,0.4313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,16,0.4353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,32,0.4354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,128,0.4359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,64,0.4350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,256,0.4358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,512,0.4374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,1024,0.4350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,2048,0.4393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,4096,0.5811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,16384,0.5131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,32768,0.5802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,65536,0.7304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,2,0.6634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,8192,0.4847
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,128,1,1,131072,1.0030
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,4,0.6664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,8,0.6623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,16,0.6668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,32,0.6636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,64,0.6675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,128,0.6736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,256,0.6784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,512,0.6798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,1024,0.6859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,2048,0.6900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,4096,0.9451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,8192,0.7506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,16384,0.8079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,32768,0.9294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,65536,1.1608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,2,1.1100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,256,1,1,131072,1.7239
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,4,1.1049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,16,1.1092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,8,1.1104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,32,1.1089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,64,1.1155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,128,1.1235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,256,1.1257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,512,1.1312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,1024,1.1319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,2048,1.1508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,4096,1.5332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,8192,1.2531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,16384,1.3804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,32768,1.6179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,512,1,1,65536,2.0926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4,1.8907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2,1.8981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8,1.8901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16,1.8952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32,1.8966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,64,1.9025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,128,1.9042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,256,1.9094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,512,1.9127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,1024,1.9357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2048,1.9644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16384,2.4049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4096,2.4961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8192,2.1560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32768,2.8631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,2,0.1846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,8,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,4,0.1849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,16,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,32,0.1864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,64,0.1840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,128,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,256,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,512,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,1024,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,2048,0.1827
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,4096,0.2052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,8192,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,16384,0.2133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,32768,0.2396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,65536,0.3032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1,1,1,131072,0.4125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,2,0.1922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,4,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,8,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,16,0.1923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,64,0.1879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,32,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,128,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,256,0.1931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,512,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,1024,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,2048,0.1960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,4096,0.2112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,8192,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,16384,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,65536,0.3193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,32768,0.2562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,2,0.2031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,4,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,2,1,1,131072,0.4271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,8,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,16,0.1965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,32,0.2074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,64,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,128,0.2029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,256,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,512,0.2053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,1024,0.2047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,2048,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,4096,0.3290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,8192,0.2312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,16384,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,32768,0.2689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,65536,0.3310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,4,1,1,131072,0.4463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,2,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,4,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,8,0.2124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,16,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,32,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,64,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,128,0.2090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,256,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,512,0.2108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,2048,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,1024,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,4096,0.3391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,8192,0.2380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,16384,0.2496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,32768,0.2790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,65536,0.3459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,2,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,4,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,8,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,8,1,1,131072,0.4616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,16,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,32,0.2214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,64,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,128,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,256,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,512,0.2214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,1024,0.2233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,2048,0.2238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,4096,0.3592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,8192,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,16384,0.2660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,32768,0.2945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,65536,0.3635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,16,1,1,131072,0.5064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,2,0.2349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,4,0.2338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,8,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,16,0.2351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,32,0.2414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,64,0.2375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,128,0.2349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,256,0.2394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,512,0.2391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,2048,0.2433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,1024,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,4096,0.3702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,8192,0.2710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,16384,0.2816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,32768,0.3166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,65536,0.4058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,32,1,1,131072,0.5601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,2,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,4,0.2634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,8,0.2649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,16,0.2679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,32,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,64,0.2626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,128,0.2656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,256,0.2672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,512,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,1024,0.2657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,2048,0.2690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,4096,0.4034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,8192,0.2999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,16384,0.3183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,32768,0.3719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,65536,0.4755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,2,0.3158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,64,1,1,131072,0.6759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,4,0.3122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,8,0.3152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,16,0.3145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,32,0.3109
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,64,0.3166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,256,0.3178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,128,0.3148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,512,0.3183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,2048,0.3184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,1024,0.3190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,4096,0.4597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,8192,0.3596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,16384,0.3944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,32768,0.4608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,65536,0.6093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,4,0.4382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,128,1,1,131072,0.8913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,8,0.4387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,2,0.4373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,16,0.4373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,32,0.4397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,64,0.4413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,128,0.4410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,256,0.4416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,512,0.4431
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,2048,0.4557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,4096,0.7048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,1024,0.4439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,16384,0.5715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,8192,0.5149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,32768,0.6909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,65536,0.9270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,2,0.6729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,4,0.6691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,8,0.6698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,256,1,1,131072,1.4903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,16,0.6732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,32,0.6742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,64,0.6810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,128,0.6867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,256,0.6866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,512,0.6902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,1024,0.6985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,2048,0.7115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,4096,1.0897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,8192,0.8179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,16384,0.9396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,32768,1.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,512,1,1,65536,1.6354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2,1.1051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4,1.1099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8,1.1059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16,1.1079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32,1.1152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,64,1.1140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,128,1.1132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,256,1.1180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,512,1.1282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,1024,1.1442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2048,1.1724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4096,1.7077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16384,1.6113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8192,1.3714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,2,0.1690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32768,2.0680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,4,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,8,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,16,0.1793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,32,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,64,0.1746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,128,0.1772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,256,0.1716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,512,0.1756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,1024,0.1752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,2048,0.1793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,4096,0.1817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,8192,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,16384,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,32768,0.2347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,65536,0.2907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1,1,1,131072,0.4079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,2,0.1856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,4,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,8,0.1777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,16,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,32,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,64,0.1860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,128,0.1746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,256,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,512,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,1024,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,2048,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,4096,0.3164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,8192,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,16384,0.2143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,32768,0.2330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,65536,0.3118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,2,1,1,131072,0.4136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,2,0.1930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,4,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,8,0.1953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,16,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,32,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,64,0.1903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,128,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,512,0.1947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,1024,0.1856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,256,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,2048,0.1958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,4096,0.2135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,32768,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,8192,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,16384,0.2205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,65536,0.3225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,4,1,1,131072,0.4339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,2,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,4,0.2011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,8,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,16,0.2002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,64,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,32,0.2043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,128,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,256,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,512,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,1024,0.1992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,4096,0.3310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,8192,0.2265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,16384,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,2048,0.2051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,32768,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,65536,0.3372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,2,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,8,1,1,131072,0.4554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,4,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,8,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,32,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,16,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,64,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,128,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,256,0.2128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,512,0.2089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,1024,0.2169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,2048,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,4096,0.3536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,16384,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,8192,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,32768,0.2906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,65536,0.3644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,16,1,1,131072,0.4946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,2,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,4,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,8,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,16,0.2279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,64,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,32,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,128,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,512,0.2273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,1024,0.2299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,2048,0.2314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,256,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,4096,0.3640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,16384,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,8192,0.2619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,32768,0.3132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,65536,0.3949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,32,1,1,131072,0.5553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,2,0.2539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,4,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,8,0.2521
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,16,0.2562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,32,0.2500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,128,0.2531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,64,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,256,0.2527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,512,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,1024,0.2567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,2048,0.2610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,4096,0.3950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,8192,0.2896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,16384,0.3121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,32768,0.3636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,65536,0.4653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,2,0.2957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,64,1,1,131072,0.6632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,4,0.2978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,8,0.3006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,16,0.2959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,32,0.2980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,64,0.2975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,128,0.3005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,512,0.3022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,1024,0.3061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,2048,0.3091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,256,0.3050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,8192,0.3486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,4096,0.4435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,16384,0.3800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,32768,0.4493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,65536,0.5902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,128,1,1,131072,0.8714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,2,0.4130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,8,0.4128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,16,0.4117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,4,0.4141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,32,0.4127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,64,0.4151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,128,0.4152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,256,0.4152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,512,0.4197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,2048,0.4325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,1024,0.4235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,4096,0.6796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,8192,0.4861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,16384,0.5458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,32768,0.6675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,65536,0.8998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,2,0.6202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,4,0.6183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,256,1,1,131072,1.4637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,16,0.6177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,32,0.6207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,8,0.6172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,64,0.6269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,128,0.6299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,256,0.6285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,2048,0.6536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,1024,0.6409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,4096,1.0353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,512,0.6352
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,8192,0.7584
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,16384,0.8861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,32768,1.1184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,512,1,1,65536,1.5884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2,1.0097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4,1.0108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8,1.0126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16,1.0160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32,1.0175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,64,1.0287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,128,1.0267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,256,1.0293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,512,1.0322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2048,1.0828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4096,1.6002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,1024,1.0495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16384,1.5133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8192,1.2774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,2,0.1671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,4,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32768,1.9652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,8,0.1714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,16,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,32,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,64,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,128,0.1670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,256,0.1678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,512,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,1024,0.1721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,2048,0.1733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,4096,0.2977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,8192,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,16384,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,32768,0.2238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,65536,0.2857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1,1,1,131072,0.4004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,2,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,4,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,8,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,16,0.1740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,32,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,64,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,128,0.1745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,256,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,512,0.1794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,1024,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,2048,0.1801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,8192,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,16384,0.2076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,32768,0.2309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,4096,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,65536,0.3020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,2,1,1,131072,0.4117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,2,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,4,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,8,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,16,0.1884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,32,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,64,0.1830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,128,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,256,0.1849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,512,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,1024,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,2048,0.1859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,4096,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,16384,0.2171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,8192,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,32768,0.2477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,65536,0.3137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,4,1,1,131072,0.4403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,2,0.1917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,4,0.1948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,8,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,16,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,32,0.1958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,64,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,128,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,256,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,512,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,1024,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,2048,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,4096,0.3227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,8192,0.2217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,16384,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,32768,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,65536,0.3366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,8,1,1,131072,0.4533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,2,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,4,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,16,0.2103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,32,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,64,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,128,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,8,0.2013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,256,0.2009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,512,0.2053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,1024,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,2048,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,8192,0.2374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,4096,0.3429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,32768,0.2793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,16384,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,65536,0.3553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,16,1,1,131072,0.4944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,2,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,4,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,8,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,16,0.2154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,32,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,64,0.2169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,128,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,256,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,512,0.2235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,1024,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,2048,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,4096,0.3555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,8192,0.2536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,16384,0.2685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,32768,0.3070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,65536,0.3940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,32,1,1,131072,0.5521
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,2,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,4,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,8,0.2444
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,16,0.2402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,32,0.2392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,128,0.2438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,64,0.2465
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,256,0.2463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,512,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,1024,0.2488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,2048,0.2496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,4096,0.3870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,8192,0.2813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,16384,0.3063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,32768,0.3537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,65536,0.4603
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,64,1,1,131072,0.6573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,2,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,4,0.2797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,8,0.2840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,16,0.2806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,64,0.2841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,32,0.2829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,128,0.2855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,256,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,512,0.2911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,1024,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,2048,0.2981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,4096,0.4288
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,16384,0.3671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,8192,0.3367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,32768,0.4347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,65536,0.5835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,128,1,1,131072,0.8594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,2,0.3878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,4,0.3916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,8,0.3898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,16,0.3919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,32,0.3918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,64,0.3932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,128,0.3939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,256,0.3947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,512,0.3948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,1024,0.4024
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,2048,0.4095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,4096,0.6601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,16384,0.5253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,8192,0.4658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,32768,0.6467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,65536,0.8783
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,2,0.5732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,256,1,1,131072,1.4494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,4,0.5732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,8,0.5726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,16,0.5742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,32,0.5721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,64,0.5766
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,128,0.5812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,256,0.5836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,512,0.5859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,1024,0.5939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,2048,0.6082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,4096,1.0952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,8192,0.7098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,16384,0.8376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,32768,1.0729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2,0.9097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4,0.9125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8,0.9168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,512,1,1,65536,1.5408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16,0.9150
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32,0.9148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,128,0.9240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,64,0.9210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,256,0.9273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,512,0.9327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2048,0.9808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,1024,0.9541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4096,1.5192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16384,1.4144
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8192,1.1718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,2,0.1707
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32768,1.8753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,4,0.1649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,16,0.1607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,8,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,32,0.1670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,64,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,128,0.1666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,256,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,512,0.1662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,1024,0.1672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,2048,0.1613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,4096,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,8192,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,16384,0.1991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,65536,0.2890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,32768,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1,1,1,131072,0.3932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,2,0.1708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,4,0.1727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,8,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,16,0.1797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,32,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,64,0.1805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,128,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,256,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,512,0.1713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,1024,0.1727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,2048,0.1793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,4096,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,8192,0.1991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,16384,0.2089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,32768,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,131072,0.4076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,2,1,1,65536,0.2900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,2,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,4,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,8,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,32,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,16,0.1891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,64,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,128,0.1769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,256,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,512,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,1024,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,2048,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,4096,0.3164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,8192,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,16384,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,65536,0.3122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,32768,0.2360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,4,1,1,131072,0.4258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,2,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,4,0.1972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,8,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,16,0.1944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,32,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,64,0.1947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,128,0.1860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,256,0.1858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,512,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,1024,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,2048,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,4096,0.3268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,8192,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,16384,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,32768,0.2537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,65536,0.3276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,8,1,1,131072,0.4575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,2,0.2094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,4,0.2064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,8,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,16,0.2053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,32,0.2075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,64,0.1990
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,128,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,512,0.2081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,1024,0.2055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,256,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,2048,0.2083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,4096,0.3365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,8192,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,16384,0.2413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,32768,0.2774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,65536,0.3601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,2,0.2129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,16,1,1,131072,0.4923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,4,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,8,0.2091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,16,0.2128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,32,0.2169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,64,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,128,0.2129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,256,0.2152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,512,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,1024,0.2130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,2048,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,4096,0.3550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,8192,0.2500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,16384,0.2696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,32768,0.3010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,65536,0.3905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,32,1,1,131072,0.5458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,2,0.2318
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,4,0.2422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,8,0.2370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,16,0.2384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,32,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,64,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,128,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,256,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,512,0.2421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,1024,0.2424
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,2048,0.2471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,4096,0.3795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,8192,0.2817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,32768,0.3498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,16384,0.2978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,65536,0.4548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,64,1,1,131072,0.6508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,2,0.2695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,4,0.2755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,8,0.2738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,16,0.2761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,32,0.2755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,128,0.2747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,64,0.2749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,256,0.2785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,512,0.2792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,1024,0.2879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,2048,0.2917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,4096,0.4288
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,8192,0.3316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,16384,0.3589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,32768,0.4289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,65536,0.5743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,128,1,1,131072,0.8536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,2,0.3803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,4,0.3780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,8,0.3804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,16,0.3797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,32,0.3788
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,64,0.3789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,128,0.3831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,256,0.3817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,512,0.3877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,1024,0.3898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,2048,0.4001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,4096,0.6511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,8192,0.4540
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,16384,0.5131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,32768,0.6350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,65536,0.8706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,256,1,1,131072,1.4338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,2,0.5511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,4,0.5500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,8,0.5520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,16,0.5513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,64,0.5548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,32,0.5526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,128,0.5565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,256,0.5621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,512,0.5633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,1024,0.5722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,2048,0.5868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,4096,0.9669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,16384,0.8151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,8192,0.6921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,32768,1.0490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2,0.8604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,512,1,1,65536,1.5175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4,0.8634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16,0.8661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8,0.8642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32,0.8667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,64,0.8744
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,128,0.8718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,256,0.8751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,512,0.8862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2048,0.9265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4096,1.4554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16384,1.3716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,1024,0.8991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8192,1.1242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32768,1.8254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,2,0.1614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,4,0.1693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,8,0.1657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,16,0.1632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,32,0.1607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,64,0.1663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,128,0.1645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,256,0.1590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,512,0.1616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,1024,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,2048,0.1655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,4096,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,8192,0.1864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,16384,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,65536,0.2820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,32768,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1,1,1,131072,0.3837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,2,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,4,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,16,0.1690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,8,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,32,0.1768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,64,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,128,0.1708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,256,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,512,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,1024,0.1797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,2048,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,4096,0.3081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,8192,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,16384,0.1994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,32768,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,65536,0.2958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,2,0.1836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,2,1,1,131072,0.4088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,4,0.1753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,8,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,16,0.1858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,32,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,64,0.1798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,128,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,256,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,512,0.1877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,1024,0.1752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,2048,0.1883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,8192,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,4096,0.3093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,16384,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,32768,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,65536,0.3112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,4,1,1,131072,0.4334
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,2,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,4,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,8,0.1920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,16,0.1921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,32,0.1850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,64,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,128,0.1944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,256,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,512,0.1858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,2048,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,1024,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,4096,0.3214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,8192,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,16384,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,32768,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,65536,0.3309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,8,1,1,131072,0.4479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,2,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,16,0.2048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,32,0.1953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,64,0.2002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,4,0.2021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,128,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,8,0.2034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,256,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,512,0.2021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,1024,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,2048,0.1990
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,4096,0.3347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,8192,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,16384,0.2432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,32768,0.2812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,65536,0.3483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,16,1,1,131072,0.4935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,2,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,4,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,8,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,16,0.2133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,32,0.2149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,64,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,128,0.2072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,256,0.2149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,512,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,1024,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,4096,0.3507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,8192,0.2501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,16384,0.2637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,32768,0.3066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,2048,0.2184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,65536,0.3917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,32,1,1,131072,0.5462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,2,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,8,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,32,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,16,0.2375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,64,0.2359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,128,0.2359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,4,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,256,0.2384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,512,0.2349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,1024,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,2048,0.2486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,4096,0.3797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,16384,0.3009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,32768,0.3453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,8192,0.2776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,65536,0.4547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,64,1,1,131072,0.6486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,2,0.2703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,4,0.2734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,8,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,16,0.2715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,32,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,64,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,128,0.2718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,256,0.2715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,512,0.2808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,1024,0.2866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,2048,0.2881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,4096,0.4248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,8192,0.3244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,16384,0.3572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,32768,0.4288
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,65536,0.5692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,2,0.3750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,4,0.3738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,128,1,1,131072,0.8556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,8,0.3730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,16,0.3722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,128,0.3738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,32,0.3731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,64,0.3760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,256,0.3783
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,512,0.3793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,1024,0.3854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,2048,0.3944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,4096,0.6404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,8192,0.4491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,16384,0.5066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,32768,0.6278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,65536,0.8634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,2,0.5415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,256,1,1,131072,1.4294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,8,0.5396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,4,0.5393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,16,0.5390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,32,0.5435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,64,0.5446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,128,0.5501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,256,0.5529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,512,0.5516
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,2048,0.5749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,1024,0.5628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,4096,1.0610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,16384,0.8038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,8192,0.6815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,32768,1.0459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,512,1,1,65536,1.5091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2,0.8381
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4,0.8386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8,0.8417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16,0.8450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32,0.8454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,64,0.8520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,128,0.8504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,256,0.8564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,512,0.8603
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2048,0.9058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,1024,0.8777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4096,1.4432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8192,1.1053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16384,1.3481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,2,0.1670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,4,0.1569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32768,1.7980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,8,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,16,0.1586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,32,0.1627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,64,0.1609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,128,0.1595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,256,0.1569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,512,0.1667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,1024,0.1632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,2048,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,4096,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,8192,0.1815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,16384,0.1891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,32768,0.2169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,131072,0.3948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1,1,1,65536,0.2774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,2,0.1774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,4,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,8,0.1728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,16,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,32,0.1680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,64,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,256,0.1756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,512,0.1670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,1024,0.1748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,128,0.1752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,2048,0.1728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,8192,0.1869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,4096,0.3021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,16384,0.2029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,32768,0.2322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,65536,0.2854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,2,1,1,131072,0.4035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,2,0.1839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,4,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,8,0.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,16,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,32,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,64,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,128,0.1775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,256,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,512,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,1024,0.1772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,2048,0.1794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,4096,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,8192,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,16384,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,32768,0.2389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,65536,0.3064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,4,1,1,131072,0.4264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,2,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,4,0.1890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,8,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,16,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,32,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,64,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,128,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,256,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,512,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,1024,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,2048,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,4096,0.3235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,8192,0.2093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,16384,0.2239
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,32768,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,131072,0.4528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,4,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,2,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,8,1,1,65536,0.3236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,8,0.2027
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,16,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,64,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,128,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,256,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,512,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,1024,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,32,0.1947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,2048,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,4096,0.3351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,8192,0.2311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,16384,0.2433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,32768,0.2796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,65536,0.3460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,16,1,1,131072,0.4888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,2,0.2041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,8,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,4,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,16,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,32,0.2109
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,64,0.2054
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,128,0.2106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,256,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,512,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,1024,0.2131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,2048,0.2194
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,4096,0.3524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,8192,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,16384,0.2644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,32768,0.3009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,65536,0.3865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,32,1,1,131072,0.5371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,2,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,4,0.2349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,8,0.2341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,16,0.2271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,32,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,64,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,128,0.2328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,512,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,256,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,1024,0.2418
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,2048,0.2456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,4096,0.3728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,8192,0.2764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,16384,0.2984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,32768,0.3446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,2,0.2649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,4,0.2710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,65536,0.4536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,8,0.2659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,64,1,1,131072,0.6527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,16,0.2688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,32,0.2704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,64,0.2696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,256,0.2734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,1024,0.2819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,128,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,512,0.2745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,2048,0.2876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,4096,0.4234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,8192,0.3271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,16384,0.3572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,32768,0.4250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,65536,0.5704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,128,1,1,131072,0.8557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,2,0.3660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,4,0.3697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,8,0.3678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,32,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,16,0.3701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,64,0.3722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,128,0.3745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,256,0.3717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,512,0.3772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,1024,0.3787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,2048,0.3895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,4096,0.6394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,16384,0.5050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,32768,0.6251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,8192,0.4471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,65536,0.8616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,2,0.5336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,4,0.5337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,8,0.5346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,256,1,1,131072,1.4229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,16,0.5346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,32,0.5369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,64,0.5395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,128,0.5423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,256,0.5445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,512,0.5491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,2048,0.5686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,1024,0.5544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,4096,1.0564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,8192,0.6740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,16384,0.7989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,32768,1.0358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,512,1,1,65536,1.5008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2,0.8296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4,0.8266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16,0.8317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8,0.8291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32,0.8339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,64,0.8396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,128,0.8370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,256,0.8424
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,512,0.8520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,1024,0.8662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2048,0.8941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4096,1.4393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16384,1.3323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8192,1.0900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32768,1.7965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,2,0.1647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,4,0.1566
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,8,0.1627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,16,0.1628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,32,0.1649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,64,0.1621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,256,0.1552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,512,0.1587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,128,0.1604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,1024,0.1587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,2048,0.1627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,4096,0.1772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,8192,0.1848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,16384,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,32768,0.2107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,65536,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,2,0.1652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1,1,1,131072,0.3859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,8,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,4,0.1755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,16,0.1740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,32,0.1742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,64,0.1658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,128,0.1722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,256,0.1658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,512,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,1024,0.1708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,2048,0.1753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,4096,0.3020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,8192,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,16384,0.1951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,32768,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,65536,0.2837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,2,1,1,131072,0.4063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,2,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,4,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,8,0.1742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,16,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,32,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,64,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,128,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,256,0.1800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,512,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,1024,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,2048,0.1762
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,4096,0.3083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,8192,0.1961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,16384,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,32768,0.2341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,65536,0.3034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,4,1,1,131072,0.4261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,2,0.1851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,4,0.1818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,8,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,16,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,32,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,64,0.1910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,128,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,256,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,512,0.1820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,1024,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,2048,0.1898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,4096,0.3177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,8192,0.2076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,16384,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,32768,0.2488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,65536,0.3265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,8,1,1,131072,0.4449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,2,0.1962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,4,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,8,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,16,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,64,0.1926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,32,0.1985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,128,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,256,0.1908
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,512,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,1024,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,2048,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,4096,0.3323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,8192,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,16384,0.2404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,32768,0.2691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,65536,0.3510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,16,1,1,131072,0.4848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,2,0.2108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,4,0.2071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,8,0.2063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,16,0.2034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,64,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,32,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,128,0.2085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,256,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,512,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,1024,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,2048,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,4096,0.3411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,8192,0.2438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,16384,0.2563
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,32768,0.3020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,65536,0.3814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,32,1,1,131072,0.5418
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,2,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,4,0.2298
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,8,0.2267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,16,0.2312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,32,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,64,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,128,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,256,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,512,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,1024,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,2048,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,4096,0.3751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,16384,0.2971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,32768,0.3450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,65536,0.4481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,8192,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,64,1,1,131072,0.6467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,2,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,4,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,8,0.2650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,16,0.2672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,64,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,128,0.2624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,32,0.2673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,256,0.2679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,512,0.2734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,1024,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,2048,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,4096,0.4206
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,8192,0.3238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,16384,0.3491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,32768,0.4231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,65536,0.5670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,128,1,1,131072,0.8471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,2,0.3645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,4,0.3680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,8,0.3663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,16,0.3649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,32,0.3658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,64,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,128,0.3681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,256,0.3725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,512,0.3720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,1024,0.3801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,2048,0.3898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,4096,0.6378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,16384,0.5040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,8192,0.4428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,32768,0.6215
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,65536,0.8584
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,256,1,1,131072,1.4203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,2,0.5284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,4,0.5308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,8,0.5297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,16,0.5304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,32,0.5301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,64,0.5333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,128,0.5408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,256,0.5412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,512,0.5433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,2048,0.5652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,1024,0.5517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,4096,0.9421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,8192,0.6683
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,16384,0.7972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,32768,1.0330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,512,1,1,65536,1.4992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2,0.8207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4,0.8208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8,0.8225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16,0.8270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32,0.8277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,64,0.8350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,128,0.8340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,256,0.8345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,512,0.8435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2048,0.8901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,1024,0.8596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4096,1.4247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8192,1.0836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16384,1.3363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,deepseek-ai/DeepSeek-V3.2,DeepseekV32ForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32768,1.7896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,2,0.2999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,4,0.2861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,8,0.2964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,16,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,32,0.2922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,64,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,128,0.2827
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,256,0.2947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,512,0.3009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,1024,0.2857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,2048,0.2938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,4096,0.3902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,8192,0.3080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,16384,0.3242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,32768,0.3231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,65536,0.3593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1,1,1,131072,0.4079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,2,0.2974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,4,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,8,0.3050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,16,0.3032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,32,0.3072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,64,0.2969
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,128,0.3029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,256,0.3063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,512,0.2981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,1024,0.3046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,2048,0.3078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,4096,0.4008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,8192,0.3242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,16384,0.3249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,32768,0.3466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,65536,0.3732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,2,1,1,131072,0.4059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,4,0.3070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,2,0.3108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,8,0.3092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,16,0.3096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,32,0.3064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,64,0.3103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,128,0.3101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,256,0.3018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,512,0.3057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,2048,0.3140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,1024,0.3138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,8192,0.3208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,4096,0.4087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,32768,0.3525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,16384,0.3400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,65536,0.3725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,4,1,1,131072,0.4192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,2,0.3195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,4,0.3196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,16,0.3142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,64,0.3215
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,8,0.3200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,32,0.3199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,128,0.3164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,256,0.3170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,512,0.3210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,1024,0.3224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,2048,0.3176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,4096,0.4255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,8192,0.3369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,16384,0.3534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,32768,0.3635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,65536,0.3956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,8,1,1,131072,0.4509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,2,0.3387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,4,0.3322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,16,0.3369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,8,0.3355
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,32,0.3371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,64,0.3347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,128,0.3350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,256,0.3399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,512,0.3373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,1024,0.3347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,2048,0.3435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,4096,0.4434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,8192,0.3561
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,16384,0.3716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,32768,0.3861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,65536,0.4302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,16,1,1,131072,0.4964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,2,0.3375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,8,0.3427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,4,0.3383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,32,0.3435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,16,0.3429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,64,0.3396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,128,0.3433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,512,0.3443
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,256,0.3397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,4096,0.4507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,8192,0.3688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,1024,0.3436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,2048,0.3520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,16384,0.3834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,32768,0.4094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,65536,0.4568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,32,1,1,131072,0.5424
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,2,0.3673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,4,0.3668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,8,0.3633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,16,0.3799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,32,0.3654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,64,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,128,0.3762
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,256,0.3630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,512,0.3739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,1024,0.3722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,2048,0.3732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,4096,0.4837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,8192,0.3994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,16384,0.4261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,32768,0.4564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,65536,0.5230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,64,1,1,131072,0.6533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,4,0.4534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,2,0.4447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,8,0.4632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,16,0.4409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,32,0.4599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,64,0.4604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,128,0.4388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,256,0.4459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,512,0.4446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,2048,0.4703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,1024,0.4578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,4096,0.5787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,8192,0.4981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,16384,0.5377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,32768,0.5851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,65536,0.6962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,4,0.5933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,8,0.5893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,128,1,1,131072,0.9093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,2,0.6033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,16,0.6192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,32,0.6239
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,64,0.5917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,128,0.6056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,256,0.6052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,512,0.6076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,1024,0.6246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,2048,0.6510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,4096,0.8380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,8192,0.6963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,16384,0.7617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,32768,0.8636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,2,0.9812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,16,0.9915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,4,0.9051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,32,0.9063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,65536,1.0676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,256,1,1,131072,1.4789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,8,0.9928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,64,0.9113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,128,0.9137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,256,0.9786
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,512,0.9415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,1024,0.9638
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,2048,1.0437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,4096,1.3780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,16384,1.2627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,8192,1.1064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,32768,1.4544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,2,1.5696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,512,1,1,65536,1.8355
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,4,1.5680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,8,1.5812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,64,1.5821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,32,1.5805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,16,1.5721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,128,1.5851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,256,1.6143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,512,1.6425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,1024,1.6868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,2048,1.8276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,4096,2.4580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,8192,1.9723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,16384,2.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,2,0.2747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,4,0.2403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,8,0.2411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,16,0.2499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,32,0.2537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,64,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,128,1024,1,1,32768,2.6318
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,128,0.2492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,256,0.2499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,512,0.2395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,1024,0.2413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,2048,0.2562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,8192,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,4096,0.3585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,16384,0.2838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,32768,0.2891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,65536,0.3151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1,1,1,131072,0.3483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,2,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,4,0.2616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,8,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,32,0.2916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,16,0.2625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,64,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,256,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,128,0.2637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,512,0.2552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,1024,0.2644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,4096,0.3575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,16384,0.2900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,8192,0.2784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,2048,0.2637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,32768,0.2951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,65536,0.3200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,2,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,2,1,1,131072,0.3718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,4,0.2672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,8,0.2681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,16,0.2637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,32,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,64,0.2626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,128,0.2606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,256,0.2660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,512,0.2682
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,1024,0.2726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,2048,0.2730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,8192,0.2790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,4096,0.3609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,16384,0.2936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,32768,0.3029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,65536,0.3349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,4,1,1,131072,0.3836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,2,0.2741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,4,0.2718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,8,0.2710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,16,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,32,0.2682
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,64,0.2689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,128,0.2726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,256,0.2741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,512,0.2724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,2048,0.2732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,1024,0.2737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,4096,0.3777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,8192,0.2861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,16384,0.3025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,32768,0.3199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,65536,0.3533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,2,0.2900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,8,1,1,131072,0.4084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,4,0.2907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,8,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,16,0.2861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,64,0.2887
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,32,0.2868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,128,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,512,0.2866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,256,0.2896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,1024,0.2930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,2048,0.3007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,4096,0.3931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,8192,0.3096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,16384,0.3241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,32768,0.3433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,65536,0.3787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,2,0.2924
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,16,1,1,131072,0.4499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,4,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,8,0.2913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,32,0.2873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,16,0.2927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,64,0.2930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,256,0.2925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,128,0.2929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,512,0.2896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,1024,0.2939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,2048,0.2977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,8192,0.3169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,4096,0.4012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,16384,0.3384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,65536,0.4055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,32768,0.3549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,2,0.3100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,32,1,1,131072,0.4957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,4,0.3068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,8,0.3231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,16,0.3202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,32,0.3104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,64,0.3054
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,128,0.3115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,256,0.3104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,512,0.3124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,1024,0.3157
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,2048,0.3230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,4096,0.4278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,8192,0.3434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,16384,0.3632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,32768,0.4019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,65536,0.4693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,64,1,1,131072,0.5991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,4,0.3643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,2,0.3592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,8,0.3646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,16,0.3794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,64,0.3605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,32,0.3589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,256,0.3672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,128,0.3605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,512,0.3769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,1024,0.3764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,2048,0.3895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,4096,0.4953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,8192,0.4154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,16384,0.4484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,32768,0.5056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,65536,0.6158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,128,1,1,131072,0.8286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,2,0.4758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,4,0.4801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,16,0.4766
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,8,0.5057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,32,0.4764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,64,0.4774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,128,0.4799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,256,0.5067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,512,0.5047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,1024,0.5074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,2048,0.5325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,4096,0.7174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,8192,0.5764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,16384,0.6445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,32768,0.7436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,65536,0.9366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,2,0.7059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,256,1,1,131072,1.3618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,4,0.7065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,8,0.7032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,16,0.7778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,32,0.7145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,64,0.7188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,128,0.7919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,256,0.7328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,512,0.7451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,1024,0.7840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,2048,0.8278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,4096,1.1905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,16384,1.0471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,8192,0.9072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,32768,1.2430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,512,1,1,65536,1.6210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,2,1.2134
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,4,1.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,8,1.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,16,1.3004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,32,1.2215
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,64,1.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,128,1.3573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,256,1.2523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,512,1.2743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,1024,1.3249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,2048,1.4561
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,4096,2.0756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,8192,1.5810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,2,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,16384,1.8726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,4,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,8,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,64,1024,1,1,32768,2.2679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,16,0.2234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,32,0.2329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,64,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,128,0.2377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,256,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,512,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,1024,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,2048,0.2265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,4096,0.3221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,8192,0.2479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,16384,0.2556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,32768,0.2589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,65536,0.2991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1,1,1,131072,0.3306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,2,0.2370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,4,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,8,0.2396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,16,0.2381
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,32,0.2381
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,128,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,64,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,256,0.2373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,512,0.2389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,1024,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,2048,0.2431
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,4096,0.3351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,8192,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,16384,0.2681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,32768,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,65536,0.3049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,2,1,1,131072,0.3453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,2,0.2396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,4,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,8,0.2393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,16,0.2419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,64,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,32,0.2392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,128,0.2437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,256,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,512,0.2382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,1024,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,2048,0.2401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,4096,0.3391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,8192,0.2614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,16384,0.2672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,32768,0.2905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,65536,0.3119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,4,1,1,131072,0.3607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,2,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,4,0.2473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,8,0.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,16,0.2455
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,32,0.2509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,64,0.2518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,128,0.2499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,256,0.2512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,512,0.2470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,1024,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,2048,0.2537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,4096,0.3478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,8192,0.2668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,16384,0.2862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,32768,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,131072,0.3843
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,2,0.2621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,8,1,1,65536,0.3231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,4,0.2658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,8,0.2644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,16,0.2656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,32,0.2678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,64,0.2647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,128,0.2647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,256,0.2637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,1024,0.2690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,512,0.2636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,2048,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,4096,0.3728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,8192,0.2857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,16384,0.3045
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,65536,0.3540
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,32768,0.3181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,16,1,1,131072,0.4267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,2,0.2600
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,4,0.2606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,8,0.2635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,16,0.2648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,32,0.2624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,64,0.2641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,128,0.2657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,256,0.2675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,512,0.2634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,2048,0.2752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,1024,0.2667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,4096,0.3738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,8192,0.2875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,16384,0.3076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,65536,0.3823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,32768,0.3296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,32,1,1,131072,0.4628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,2,0.2752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,4,0.2699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,8,0.2758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,16,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,32,0.2752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,64,0.2767
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,128,0.2704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,256,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,512,0.2800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,1024,0.2782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,2048,0.2873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,4096,0.3885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,8192,0.3044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,16384,0.3333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,32768,0.3621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,65536,0.4338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,64,1,1,131072,0.5625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,2,0.3163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,4,0.3199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,8,0.3206
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,16,0.3184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,32,0.3226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,128,0.3196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,64,0.3168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,256,0.3234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,512,0.3214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,1024,0.3291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,4096,0.4527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,2048,0.3444
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,16384,0.4089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,8192,0.3704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,32768,0.4556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,65536,0.5717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,128,1,1,131072,0.7864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,4,0.4126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,2,0.4119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,8,0.4125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,16,0.4136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,32,0.4107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,64,0.4101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,128,0.4141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,256,0.4229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,512,0.4236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,2048,0.4589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,1024,0.4347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,4096,0.6476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,8192,0.5042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,16384,0.5723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,32768,0.6688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,65536,0.8632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,8,0.5721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,2,0.5717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,256,1,1,131072,1.2903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,4,0.5728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,32,0.5749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,16,0.5716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,64,0.5761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,128,0.5817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,256,0.5913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,512,0.6014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,2048,0.6753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,4096,1.0331
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,1024,0.6451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,16384,0.9016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,8192,0.7564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,32768,1.1007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,2,0.9449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,512,1,1,65536,1.4880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,4,0.9482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,8,0.9531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,16,0.9564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,32,0.9609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,64,0.9701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,128,0.9960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,256,0.9931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,512,1.0152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,2048,1.1740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,16384,1.5983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,1024,1.0850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,4096,1.7889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,8192,1.3190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,32,1024,1,1,32768,1.9883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,2,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,4,0.2316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,8,0.2172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,16,0.2117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,128,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,32,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,64,0.2128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,256,0.2169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,512,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,1024,0.2361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,2048,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,4096,0.3048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,8192,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,16384,0.2502
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,32768,0.2503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,65536,0.2793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1,1,1,131072,0.3231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,2,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,4,0.2236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,8,0.2167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,32,0.2218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,64,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,16,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,128,0.2249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,256,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,512,0.2267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,1024,0.2235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,2048,0.2212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,8192,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,4096,0.3165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,16384,0.2520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,32768,0.2664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,65536,0.2924
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,2,1,1,131072,0.3345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,2,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,4,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,8,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,16,0.2269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,32,0.2287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,64,0.2297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,128,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,256,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,512,0.2275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,1024,0.2223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,2048,0.2266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,4096,0.3218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,8192,0.2466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,16384,0.2615
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,32768,0.2751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,65536,0.2998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,4,1,1,131072,0.3437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,2,0.2327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,4,0.2311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,8,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,16,0.2313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,32,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,64,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,128,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,256,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,512,0.2369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,1024,0.2340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,2048,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,4096,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,16384,0.2715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,8192,0.2554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,32768,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,65536,0.3103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,8,1,1,131072,0.3728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,2,0.2485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,4,0.2510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,8,0.2526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,16,0.2518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,32,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,64,0.2523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,128,0.2482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,256,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,512,0.2482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,1024,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,2048,0.2605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,4096,0.3603
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,8192,0.2687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,16384,0.2898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,32768,0.3025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,65536,0.3408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,16,1,1,131072,0.4129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,2,0.2435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,4,0.2488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,8,0.2491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,16,0.2483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,32,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,64,0.2454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,128,0.2486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,256,0.2485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,512,0.2488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,1024,0.2486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,2048,0.2603
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,4096,0.3503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,8192,0.2750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,16384,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,32768,0.3152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,65536,0.3640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,32,1,1,131072,0.4531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,2,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,4,0.2581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,8,0.2636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,16,0.2598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,64,0.2655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,32,0.2553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,128,0.2574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,256,0.2598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,512,0.2597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,1024,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,2048,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,4096,0.3741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,8192,0.2928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,16384,0.3129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,65536,0.4169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,32768,0.3472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,2,0.2930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,64,1,1,131072,0.5459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,4,0.2962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,8,0.2982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,16,0.3140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,32,0.3012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,64,0.2929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,128,0.2969
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,256,0.2947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,512,0.3036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,1024,0.3074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,2048,0.3212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,4096,0.4282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,8192,0.3481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,16384,0.3857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,32768,0.4340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,65536,0.5457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,128,1,1,131072,0.7596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,2,0.4073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,4,0.3800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,8,0.4128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,16,0.3777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,32,0.3765
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,64,0.3739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,128,0.3784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,256,0.3806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,512,0.3876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,1024,0.4086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,2048,0.4237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,4096,0.6131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,8192,0.4676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,16384,0.5343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,32768,0.6354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,65536,0.8287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,2,0.5122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,4,0.6000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,256,1,1,131072,1.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,16,0.5113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,32,0.5104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,8,0.5138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,64,0.5160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,128,0.5158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,256,0.5268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,512,0.5382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,1024,0.5608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,2048,0.6103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,4096,0.9785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,8192,0.6894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,16384,0.8354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,32768,1.0359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,2,0.9787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,4,0.8068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,8,0.8169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,512,1,1,65536,1.4183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,16,0.8185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,32,0.8246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,64,0.8316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,128,0.8314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,256,0.8430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,512,0.8738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,2048,1.0430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,1024,0.9293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,4096,1.6361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,8192,1.1763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,16384,1.4546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,16,1024,1,1,32768,1.8414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,2,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,4,0.2055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,16,0.2071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,32,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,8,0.1993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,64,0.2104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,128,0.1971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,512,0.1868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,256,0.2002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,1024,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,2048,0.1986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,4096,0.2980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,8192,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,16384,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,32768,0.2329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,65536,0.2599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1,1,1,131072,0.3174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,2,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,4,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,8,0.2011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,16,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,32,0.1962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,64,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,128,0.1942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,256,0.2007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,512,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,1024,0.2107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,2048,0.2129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,4096,0.3050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,8192,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,16384,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,32768,0.2403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,65536,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,2,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,2,1,1,131072,0.3201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,4,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,8,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,16,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,32,0.2063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,64,0.2145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,128,0.2023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,256,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,512,0.2053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,1024,0.2043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,2048,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,4096,0.2953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,8192,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,16384,0.2494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,32768,0.2632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,65536,0.2839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,4,1,1,131072,0.3319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,4,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,2,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,8,0.2146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,16,0.2208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,32,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,64,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,128,0.2214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,256,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,1024,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,512,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,2048,0.2312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,4096,0.3335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,8192,0.2443
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,16384,0.2638
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,32768,0.2723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,65536,0.3030
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,2,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,4,0.2383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,8,1,1,131072,0.3671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,8,0.2371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,16,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,32,0.2352
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,64,0.2232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,128,0.2252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,256,0.2397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,1024,0.2433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,2048,0.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,512,0.2437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,4096,0.3501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,8192,0.2598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,16384,0.2821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,32768,0.2932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,65536,0.3367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,16,1,1,131072,0.4075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,2,0.2337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,4,0.2381
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,8,0.2321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,16,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,32,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,64,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,128,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,256,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,512,0.2384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,1024,0.2460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,2048,0.2462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,4096,0.3530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,8192,0.2662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,16384,0.2810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,32768,0.3086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,65536,0.3592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,2,0.2498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,32,1,1,131072,0.4421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,4,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,8,0.2471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,16,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,32,0.2463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,64,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,128,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,256,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,512,0.2495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,1024,0.2517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,2048,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,4096,0.3606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,16384,0.3056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,32768,0.3408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,8192,0.2804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,65536,0.4072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,64,1,1,131072,0.5383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,2,0.2917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,4,0.2841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,8,0.2807
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,16,0.2849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,32,0.2830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,64,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,128,0.2862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,256,0.2979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,512,0.2866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,1024,0.3042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,2048,0.3030
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,4096,0.4179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,8192,0.3379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,16384,0.3745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,32768,0.4277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,65536,0.5349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,128,1,1,131072,0.7501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,2,0.3589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,4,0.3551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,8,0.3611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,16,0.3585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,32,0.3575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,64,0.3586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,128,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,512,0.3873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,1024,0.3797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,256,0.3613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,2048,0.4064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,4096,0.5926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,8192,0.4490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,16384,0.5164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,32768,0.6172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,65536,0.8129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,2,0.4822
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,4,0.4789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,256,1,1,131072,1.2356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,8,0.4806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,16,0.4811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,32,0.4797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,64,0.5226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,128,0.4883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,256,0.4955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,512,0.5056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,1024,0.5287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,2048,0.5794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,4096,0.9294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,8192,0.6602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,16384,0.8055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,32768,1.0047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,512,1,1,65536,1.3935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,2,0.7468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,4,0.7502
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,8,0.7505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,16,0.7553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,32,0.7562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,64,0.7629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,128,0.8189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,256,0.7847
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,512,0.8528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,2048,0.9605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,1024,0.8492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,4096,1.5660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,2,0.1869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,8192,1.1067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,4,0.1866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,16384,1.3944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,8,0.1824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,8,1024,1,1,32768,1.7908
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,16,0.1756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,32,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,128,0.1752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,256,0.1883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,64,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,512,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,1024,0.1930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,2048,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,4096,0.2743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,8192,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,16384,0.2053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,32768,0.2278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,65536,0.2484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1,1,1,131072,0.2908
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,2,0.2085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,8,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,16,0.1931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,4,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,64,0.1982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,32,0.1849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,128,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,256,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,512,0.1939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,1024,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,4096,0.2785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,2048,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,8192,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,16384,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,32768,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,65536,0.2623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,2,1,1,131072,0.3368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,2,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,4,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,8,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,16,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,32,0.2052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,64,0.2006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,128,0.1960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,256,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,512,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,1024,0.1991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,2048,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,4096,0.3004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,8192,0.2167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,16384,0.2439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,32768,0.2473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,131072,0.3254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,4,1,1,65536,0.2914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,2,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,8,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,4,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,16,0.2095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,32,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,64,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,128,0.2063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,256,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,512,0.2076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,1024,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,2048,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,4096,0.3111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,8192,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,16384,0.2545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,32768,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,65536,0.3060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,8,1,1,131072,0.3625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,2,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,4,0.2198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,8,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,16,0.2227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,32,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,64,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,128,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,256,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,512,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,1024,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,2048,0.2496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,4096,0.3435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,8192,0.2607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,16384,0.2738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,32768,0.2982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,65536,0.3361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,16,1,1,131072,0.4001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,2,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,4,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,8,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,16,0.2185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,32,0.2253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,64,0.2306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,128,0.2213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,256,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,1024,0.2461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,512,0.2196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,2048,0.2493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,4096,0.3452
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,8192,0.2629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,16384,0.2871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,32768,0.3059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,65536,0.3521
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,4,0.2516
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,2,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,32,1,1,131072,0.4428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,8,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,16,0.2403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,32,0.2430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,64,0.2438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,128,0.2416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,256,0.2334
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,512,0.2430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,2048,0.2588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,1024,0.2490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,4096,0.3642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,8192,0.2780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,32768,0.3346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,16384,0.3022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,65536,0.4047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,64,1,1,131072,0.5315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,2,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,8,0.2740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,4,0.2740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,16,0.2778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,32,0.2711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,64,0.2845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,128,0.2808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,256,0.2829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,512,0.2829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,1024,0.2928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,2048,0.3003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,4096,0.4115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,16384,0.3684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,8192,0.3332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,32768,0.4223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,65536,0.5300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,2,0.3515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,128,1,1,131072,0.7461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,4,0.3704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,8,0.3491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,16,0.3509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,32,0.3624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,64,0.3494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,128,0.3525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,256,0.3585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,512,0.3598
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,1024,0.3770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,2048,0.3970
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,4096,0.5847
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,16384,0.5096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,8192,0.4415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,32768,0.6076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,2,0.5025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,65536,0.8035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,256,1,1,131072,1.2259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,4,0.4622
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,8,0.5094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,16,0.4640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,64,0.4681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,32,0.4648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,128,0.4711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,256,0.4794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,512,0.4912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,1024,0.5118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,2048,0.5633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,4096,0.9127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,8192,0.6404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,16384,0.7892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,32768,0.9839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,512,1,1,65536,1.3724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,2,0.7194
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,4,0.7198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,8,0.7221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,16,0.7245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,32,0.7277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,64,0.7385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,128,0.7324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,256,0.7534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,512,0.7759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,2048,0.9272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,1024,0.8195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,4096,1.5319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,16384,1.3697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,2,0.1837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,4,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,8192,1.0749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,4,1024,1,1,32768,1.7477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,8,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,16,0.1830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,32,0.1939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,64,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,128,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,256,0.1708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,512,0.1781
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,1024,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,2048,0.1754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,4096,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,8192,0.1968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,16384,0.2085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,32768,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,65536,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1,1,1,131072,0.2903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,2,0.1870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,4,0.1817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,8,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,16,0.1946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,32,0.1879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,64,0.1858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,128,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,256,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,1024,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,512,0.1871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,2048,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,4096,0.2967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,8192,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,16384,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,65536,0.2583
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,131072,0.3012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,2,1,1,32768,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,2,0.1860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,4,0.1924
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,8,0.1960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,16,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,32,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,64,0.1871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,128,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,512,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,256,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,1024,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,2048,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,4096,0.2999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,8192,0.2128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,16384,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,32768,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,131072,0.3292
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,2,0.2035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,4,1,1,65536,0.2679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,4,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,8,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,16,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,32,0.1985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,64,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,128,0.2065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,256,0.2081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,512,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,1024,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,2048,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,8192,0.2312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,16384,0.2472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,32768,0.2678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,65536,0.3050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,131072,0.3572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,8,1,1,4096,0.3133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,2,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,4,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,8,0.2208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,32,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,64,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,128,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,256,0.2179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,16,0.2166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,512,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,1024,0.2214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,2048,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,4096,0.3446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,8192,0.2538
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,32768,0.2933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,65536,0.3284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,131072,0.3951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,16,1,1,16384,0.2775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,2,0.2184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,4,0.2205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,8,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,16,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,32,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,64,0.2178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,128,0.2173
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,256,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,512,0.2211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,1024,0.2375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,2048,0.2418
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,16384,0.2808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,8192,0.2585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,4096,0.3456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,32768,0.2990
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,65536,0.3531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,32,1,1,131072,0.4397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,2,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,4,0.2278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,8,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,16,0.2294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,32,0.2352
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,64,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,128,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,256,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,512,0.2466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,1024,0.2493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,2048,0.2576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,4096,0.3597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,8192,0.2759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,16384,0.2970
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,32768,0.3348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,65536,0.4028
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,64,1,1,131072,0.5341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,2,0.2782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,4,0.2831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,8,0.2725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,16,0.2751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,32,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,64,0.2694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,128,0.2769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,256,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,512,0.2902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,1024,0.2890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,2048,0.3012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,4096,0.4083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,8192,0.3259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,16384,0.3670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,32768,0.4195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,65536,0.5292
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,128,1,1,131072,0.7388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,2,0.3758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,4,0.3459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,8,0.3462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,16,0.3445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,32,0.3425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,64,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,128,0.3486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,256,0.3471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,1024,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,2048,0.3929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,512,0.3585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,4096,0.5825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,8192,0.4329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,16384,0.5056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,32768,0.6049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,65536,0.7991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,2,0.4562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,256,1,1,131072,1.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,4,0.4572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,8,0.4569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,32,0.4567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,16,0.4549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,64,0.4580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,128,0.4629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,256,0.4715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,512,0.4824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,2048,0.5520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,4096,0.9077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,1024,0.5038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,8192,0.6330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,16384,0.7797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,32768,0.9817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,512,1,1,65536,1.3592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,4,0.7081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,2,0.7047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,8,0.7066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,16,0.7086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,32,0.7078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,64,0.7188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,128,0.7231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,256,0.7335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,512,0.7631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,1024,0.8040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,2048,0.9249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,4096,1.5210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,2,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,16384,1.3446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,4,0.1807
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,8192,1.0558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,8,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,2,1024,1,1,32768,1.7300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,16,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,32,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,64,0.1658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,256,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,128,0.1795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,1024,0.1825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,2048,0.1782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,512,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,4096,0.2769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,8192,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,16384,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,32768,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,65536,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1,1,1,131072,0.2853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,2,0.1849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,8,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,16,0.1835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,4,0.1850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,32,0.1917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,64,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,128,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,256,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,512,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,1024,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,2048,0.1877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,4096,0.2876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,8192,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,16384,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,65536,0.2518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,32768,0.2196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,2,1,1,131072,0.3072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,2,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,4,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,8,0.1839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,16,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,32,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,64,0.1856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,128,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,256,0.1863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,512,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,1024,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,4096,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,2048,0.1933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,8192,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,16384,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,32768,0.2415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,65536,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,4,1,1,131072,0.3225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,2,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,4,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,8,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,16,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,32,0.2013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,64,0.2033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,128,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,256,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,512,0.2031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,1024,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,2048,0.2044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,4096,0.3041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,8192,0.2179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,16384,0.2455
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,32768,0.2608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,65536,0.2932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,8,1,1,131072,0.3565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,2,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,4,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,8,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,32,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,16,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,64,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,128,0.2103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,512,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,256,0.2197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,1024,0.2210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,2048,0.2371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,4096,0.3352
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,8192,0.2434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,16384,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,32768,0.2865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,65536,0.3307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,2,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,16,1,1,131072,0.4002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,4,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,8,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,16,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,32,0.2114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,64,0.2095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,128,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,256,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,512,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,1024,0.2336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,2048,0.2423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,4096,0.3395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,8192,0.2560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,16384,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,32768,0.3023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,65536,0.3488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,32,1,1,131072,0.4394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,2,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,16,0.2232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,32,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,4,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,64,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,8,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,128,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,256,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,512,0.2432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,1024,0.2505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,2048,0.2563
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,4096,0.3578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,8192,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,16384,0.3002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,65536,0.4014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,131072,0.5253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,64,1,1,32768,0.3342
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,2,0.2735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,4,0.2588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,8,0.2584
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,16,0.2735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,32,0.2592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,64,0.2743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,128,0.2662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,256,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,512,0.2839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,1024,0.2946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,2048,0.3002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,4096,0.4005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,8192,0.3280
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,16384,0.3676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,32768,0.4191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,65536,0.5237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,2,0.3417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,128,1,1,131072,0.7465
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,4,0.3429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,8,0.3472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,16,0.3473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,32,0.3602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,64,0.3426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,128,0.3631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,256,0.3652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,512,0.3697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,1024,0.3667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,2048,0.3876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,4096,0.5838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,16384,0.5048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,8192,0.4371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,32768,0.5981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,65536,0.7989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,2,0.4504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,4,0.5236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,256,1,1,131072,1.2254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,16,0.4522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,8,0.4518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,32,0.4852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,64,0.4528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,128,0.4599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,256,0.4659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,512,0.5052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,2048,0.5519
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,1024,0.5018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,4096,0.9047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,8192,0.6296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,16384,0.7694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,32768,0.9713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,2,0.8429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,4,0.6962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,512,1,1,65536,1.3556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,16,0.7864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,8,0.6968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,32,0.8619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,64,0.7103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,128,0.7137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,256,0.8021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,512,0.7552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,1024,0.7949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,2048,0.9149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,4096,1.5099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,8192,1.0535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,16384,1.3293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,float16,1,1024,1,1,32768,1.7195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,2,0.2685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,4,0.2786
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,8,0.2790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,16,0.2750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,32,0.2752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,64,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,128,0.2737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,256,0.2773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,512,0.2687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,1024,0.2748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,2048,0.2744
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,4096,0.3748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,8192,0.2899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,16384,0.3028
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,32768,0.3075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,65536,0.3737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1,1,1,131072,0.3709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,2,0.2888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,4,0.2879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,8,0.2860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,16,0.2889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,32,0.2889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,64,0.2823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,128,0.2852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,256,0.2798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,1024,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,512,0.2870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,2048,0.2871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,4096,0.3826
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,8192,0.3008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,16384,0.3065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,32768,0.3235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,65536,0.3434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,2,1,1,131072,0.3957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,2,0.3004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,4,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,8,0.3011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,16,0.2979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,64,0.3010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,32,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,128,0.2953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,256,0.3033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,512,0.2991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,1024,0.3008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,2048,0.3017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,4096,0.3960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,8192,0.3145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,32768,0.3339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,65536,0.3697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,16384,0.3219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,4,1,1,131072,0.4125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,2,0.3107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,4,0.3145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,8,0.3132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,16,0.3134
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,64,0.3075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,32,0.3157
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,128,0.3090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,256,0.3130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,512,0.3115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,2048,0.3146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,16384,0.3349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,4096,0.4132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,32768,0.3494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,8192,0.3265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,1024,0.3165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,131072,0.4393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,8,1,1,65536,0.3852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,2,0.3299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,4,0.3326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,8,0.3306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,32,0.3348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,16,0.3353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,64,0.3329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,128,0.3279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,256,0.3320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,512,0.3284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,2048,0.3338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,1024,0.3375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,4096,0.4304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,8192,0.3468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,16384,0.3634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,32768,0.3759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,65536,0.4165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,16,1,1,131072,0.4816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,2,0.3491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,4,0.3513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,8,0.3447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,32,0.3482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,16,0.3469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,64,0.3474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,128,0.3504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,256,0.3452
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,512,0.3503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,1024,0.3510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,2048,0.3468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,4096,0.4500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,8192,0.3671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,16384,0.3848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,32768,0.4087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,65536,0.4533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,32,1,1,131072,0.5451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,2,0.4043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,4,0.4016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,8,0.4013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,16,0.4017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,32,0.3987
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,64,0.4011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,128,0.4018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,256,0.3978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,512,0.4015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,2048,0.4050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,4096,0.5062
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,1024,0.4041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,16384,0.4496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,8192,0.4216
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,32768,0.4839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,65536,0.5478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,64,1,1,131072,0.6814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,4,0.5016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,8,0.5039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,2,0.5013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,16,0.5009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,32,0.5042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,64,0.5009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,128,0.4974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,256,0.5041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,512,0.5000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,1024,0.5032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,2048,0.5111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,4096,0.6100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,16384,0.5695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,8192,0.5387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,32768,0.6223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,65536,0.7382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,128,1,1,131072,0.9533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,2,0.7429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,4,0.7405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,8,0.7420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,32,0.7403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,16,0.7396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,64,0.7439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,128,0.7499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,256,0.7496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,1024,0.7571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,512,0.7521
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,2048,0.7693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,4096,0.9488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,16384,0.8728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,32768,0.9767
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,8192,0.8135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,65536,1.1672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,2,1.2274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,4,1.2238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,8,1.2217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,256,1,1,131072,1.5929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,16,1.2260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,32,1.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,64,1.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,128,1.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,256,1.2415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,512,1.2443
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,2048,1.2641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,1024,1.2503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,4096,1.6178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,8192,1.3606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,16384,1.4879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,32768,1.6826
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,512,1,1,65536,2.0760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,2,2.1066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,4,2.1053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,8,2.1056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,32,2.1191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,16,2.1111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,64,2.1159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,128,2.1114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,256,2.1203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,512,2.1295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,2048,2.1745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,1024,2.1408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,8192,2.3553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,4096,2.7690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,16384,2.5902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,128,1024,1,1,32768,2.9822
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,2,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,4,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,8,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,16,0.2177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,32,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,64,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,128,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,256,0.2131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,512,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,1024,0.2174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,2048,0.2112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,4096,0.3160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,8192,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,16384,0.2362
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,32768,0.2522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,65536,0.2742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,2,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1,1,1,131072,0.3175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,4,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,8,0.2311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,16,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,32,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,64,0.2273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,128,0.2271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,256,0.2297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,512,0.2291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,1024,0.2217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,2048,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,4096,0.3246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,8192,0.2338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,16384,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,32768,0.2623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,65536,0.2903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,2,1,1,131072,0.3340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,2,0.2316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,4,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,8,0.2352
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,16,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,32,0.2329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,64,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,128,0.2342
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,1024,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,512,0.2359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,256,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,2048,0.2361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,4096,0.3234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,8192,0.2471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,16384,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,32768,0.2748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,65536,0.2976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,4,1,1,131072,0.3413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,2,0.2424
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,4,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,8,0.2380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,16,0.2410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,32,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,64,0.2433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,128,0.2410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,256,0.2390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,512,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,1024,0.2382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,2048,0.2440
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,4096,0.3416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,8192,0.2516
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,16384,0.2702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,32768,0.2813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,65536,0.3075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,2,0.2618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,8,1,1,131072,0.3643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,4,0.2597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,8,0.2604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,16,0.2572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,32,0.2618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,64,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,256,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,128,0.2566
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,512,0.2605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,1024,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,2048,0.2607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,4096,0.3568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,8192,0.2781
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,16384,0.2876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,32768,0.3083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,65536,0.3431
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,16,1,1,131072,0.4145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,2,0.2676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,8,0.2632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,16,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,4,0.2667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,32,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,64,0.2663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,256,0.2662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,128,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,512,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,1024,0.2642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,2048,0.2705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,4096,0.3699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,8192,0.2796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,16384,0.3020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,32768,0.3234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,65536,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,32,1,1,131072,0.4632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,2,0.2938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,4,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,8,0.2927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,16,0.2895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,32,0.2954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,64,0.2945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,128,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,256,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,512,0.2942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,1024,0.2977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,2048,0.2977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,4096,0.3937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,8192,0.3194
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,16384,0.3425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,32768,0.3697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,131072,0.5722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,64,1,1,65536,0.4436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,2,0.3361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,4,0.3383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,8,0.3342
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,16,0.3372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,32,0.3399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,64,0.3343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,128,0.3387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,256,0.3390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,512,0.3396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,1024,0.3395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,4096,0.4520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,2048,0.3405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,8192,0.3739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,16384,0.4090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,32768,0.4572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,65536,0.5726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,128,1,1,131072,0.7849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,2,0.4706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,4,0.4698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,8,0.4680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,16,0.4689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,32,0.4708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,64,0.4701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,128,0.4712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,256,0.4712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,512,0.4715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,1024,0.4749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,2048,0.4818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,4096,0.6676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,16384,0.5895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,8192,0.5288
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,32768,0.6905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,65536,0.8842
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,256,1,1,131072,1.3115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,2,0.7174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,4,0.7161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,8,0.7184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,32,0.7228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,16,0.7188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,64,0.7264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,128,0.7310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,256,0.7313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,512,0.7355
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,2048,0.7563
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,1024,0.7396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,4096,1.1000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,8192,0.8454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,16384,0.9746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,32768,1.1661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,2,1.1966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,4,1.1964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,512,1,1,65536,1.5489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,8,1.1962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,16,1.1964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,32,1.1971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,64,1.2046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,128,1.2011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,256,1.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,2048,1.2552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,512,1.2106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,1024,1.2291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,4096,1.8478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,8192,1.4196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,16384,1.6849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,64,1024,1,1,32768,2.0629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,4,0.1950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,2,0.1907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,8,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,16,0.1985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,32,0.1964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,64,0.1905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,128,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,256,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,512,0.1909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,1024,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,2048,0.2035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,8192,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,4096,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,16384,0.2144
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,32768,0.2321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,65536,0.2539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1,1,1,131072,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,2,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,4,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,8,0.2111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,32,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,16,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,64,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,128,0.2063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,256,0.2032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,512,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,1024,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,2048,0.2103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,4096,0.2931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,8192,0.2217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,16384,0.2358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,32768,0.2416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,65536,0.2650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,2,1,1,131072,0.3165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,2,0.2143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,4,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,8,0.2117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,16,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,32,0.2149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,64,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,128,0.2177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,256,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,512,0.2135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,1024,0.2149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,2048,0.2126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,4096,0.3037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,8192,0.2291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,16384,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,32768,0.2539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,65536,0.2858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,4,1,1,131072,0.3256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,2,0.2217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,4,0.2186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,8,0.2173
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,16,0.2198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,32,0.2234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,64,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,128,0.2238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,256,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,512,0.2219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,1024,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,2048,0.2186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,8192,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,16384,0.2505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,4096,0.3179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,32768,0.2624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,65536,0.2934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,8,1,1,131072,0.3510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,2,0.2360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,4,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,8,0.2396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,16,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,32,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,64,0.2392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,128,0.2408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,512,0.2392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,256,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,1024,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,2048,0.2434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,4096,0.3419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,8192,0.2539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,16384,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,32768,0.2885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,65536,0.3230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,16,1,1,131072,0.3886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,2,0.2432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,4,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,8,0.2438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,16,0.2410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,32,0.2432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,64,0.2401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,128,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,256,0.2395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,512,0.2435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,1024,0.2482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,2048,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,4096,0.3455
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,8192,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,16384,0.2747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,32768,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,65536,0.3501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,32,1,1,131072,0.4406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,2,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,4,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,8,0.2649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,16,0.2667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,32,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,64,0.2633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,128,0.2655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,512,0.2672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,1024,0.2694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,256,0.2662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,2048,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,4096,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,8192,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,16384,0.3094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,32768,0.3465
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,65536,0.4138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,64,1,1,131072,0.5412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,2,0.3082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,4,0.3087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,8,0.3097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,16,0.3038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,32,0.3064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,64,0.3096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,128,0.3094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,256,0.3096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,512,0.3098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,2048,0.3149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,1024,0.3112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,4096,0.4223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,8192,0.3392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,16384,0.3709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,32768,0.4305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,65536,0.5394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,2,0.4254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,4,0.4247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,128,1,1,131072,0.7575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,8,0.4234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,16,0.4233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,32,0.4222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,64,0.4243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,128,0.4252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,256,0.4261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,512,0.4257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,1024,0.4299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,2048,0.4379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,4096,0.6173
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,16384,0.5448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,8192,0.4794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,32768,0.6434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,65536,0.8386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,256,1,1,131072,1.2632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,2,0.6278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,4,0.6295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,8,0.6284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,32,0.6313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,16,0.6293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,64,0.6376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,128,0.6401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,256,0.6398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,512,0.6407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,2048,0.6616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,8192,0.7498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,4096,1.0121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,1024,0.6493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,16384,0.8810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,32768,1.0741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,512,1,1,65536,1.4681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,2,1.0393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,4,1.0366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,8,1.0374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,16,1.0397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,64,1.0483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,32,1.0452
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,128,1.0473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,256,1.0507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,512,1.0575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,2048,1.0965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,1024,1.0715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,4096,1.6925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,8192,1.2675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,16384,1.5226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,2,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,4,0.1800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,8,0.1834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,32,1024,1,1,32768,1.8967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,16,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,32,0.1881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,64,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,128,0.1882
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,256,0.1824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,512,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,1024,0.1827
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,2048,0.1823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,4096,0.2868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,8192,0.2011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,16384,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,32768,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,65536,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1,1,1,131072,0.2840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,2,0.1891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,4,0.1951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,8,0.1949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,16,0.1968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,32,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,64,0.1972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,128,0.1899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,256,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,512,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,1024,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,2048,0.1970
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,4096,0.2947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,8192,0.2086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,16384,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,32768,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,65536,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,2,1,1,131072,0.2996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,2,0.1960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,4,0.2014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,8,0.2049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,16,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,32,0.2034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,64,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,128,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,256,0.1986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,512,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,1024,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,2048,0.2074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,4096,0.2982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,8192,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,16384,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,32768,0.2338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,65536,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,4,1,1,131072,0.3068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,4,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,8,0.2081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,2,0.2109
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,16,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,32,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,64,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,128,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,256,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,512,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,1024,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,2048,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,4096,0.3082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,8192,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,16384,0.2327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,32768,0.2463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,65536,0.2791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,8,1,1,131072,0.3384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,2,0.2267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,4,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,8,0.2276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,16,0.2208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,32,0.2229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,64,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,128,0.2231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,512,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,1024,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,256,0.2297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,2048,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,4096,0.3225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,8192,0.2369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,16384,0.2587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,32768,0.2730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,65536,0.3140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,16,1,1,131072,0.3815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,2,0.2278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,4,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,16,0.2257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,8,0.2236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,32,0.2269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,64,0.2277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,128,0.2290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,256,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,512,0.2287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,1024,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,2048,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,4096,0.3254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,8192,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,16384,0.2672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,32768,0.2862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,65536,0.3345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,32,1,1,131072,0.4278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,2,0.2459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,4,0.2500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,8,0.2458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,32,0.2491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,16,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,64,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,128,0.2524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,256,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,512,0.2466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,1024,0.2515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,2048,0.2522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,4096,0.3549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,8192,0.2759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,16384,0.2976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,32768,0.3315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,65536,0.4009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,64,1,1,131072,0.5261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,2,0.2855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,4,0.2817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,8,0.2873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,16,0.2857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,64,0.2864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,32,0.2870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,128,0.2851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,256,0.2854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,512,0.2834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,1024,0.2839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,2048,0.2923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,4096,0.3976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,16384,0.3539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,8192,0.3201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,32768,0.4060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,65536,0.5141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,128,1,1,131072,0.7359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,2,0.3883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,4,0.3899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,8,0.3912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,16,0.3916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,32,0.3902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,64,0.3899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,128,0.3892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,256,0.3896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,512,0.3899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,1024,0.3961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,2048,0.4035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,4096,0.5859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,8192,0.4477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,16384,0.5101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,32768,0.6072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,65536,0.8068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,2,0.5660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,256,1,1,131072,1.2313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,4,0.5652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,8,0.5671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,16,0.5677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,32,0.5669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,64,0.5706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,128,0.5733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,256,0.5728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,512,0.5779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,1024,0.5843
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,2048,0.5989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,4096,0.9426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,8192,0.6827
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,16384,0.8154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,32768,1.0133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,512,1,1,65536,1.3950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,2,0.9049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,4,0.9018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,8,0.9043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,16,0.9048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,64,0.9098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,32,0.9036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,128,0.9092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,256,0.9114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,512,0.9219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,1024,0.9358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,2048,0.9649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,4096,1.5594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,2,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,8192,1.1276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,32768,1.7674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,4,0.1686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,16,1024,1,1,16384,1.3825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,8,0.1734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,32,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,16,0.1754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,64,0.1718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,128,0.1739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,256,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,512,0.1752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,1024,0.1693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,2048,0.1735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,4096,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,8192,0.1880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,16384,0.2022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,32768,0.2055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,65536,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,2,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1,1,1,131072,0.2707
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,4,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,8,0.1799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,32,0.1857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,16,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,64,0.1802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,128,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,512,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,256,0.1817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,1024,0.1850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,2048,0.1805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,4096,0.2841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,8192,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,16384,0.2092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,32768,0.2234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,131072,0.2969
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,2,1,1,65536,0.2462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,2,0.1919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,4,0.1865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,8,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,16,0.1920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,32,0.1906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,64,0.1919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,128,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,256,0.1909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,512,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,1024,0.1931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,2048,0.1973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,4096,0.2847
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,8192,0.2021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,16384,0.2184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,32768,0.2316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,65536,0.2536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,2,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,4,0.2007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,4,1,1,131072,0.2994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,8,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,16,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,32,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,64,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,128,0.1971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,256,0.2014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,512,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,1024,0.1964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,2048,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,4096,0.2961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,8192,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,16384,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,32768,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,65536,0.2662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,8,1,1,131072,0.3307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,2,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,4,0.2177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,8,0.2190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,16,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,32,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,64,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,128,0.2134
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,256,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,1024,0.2172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,512,0.2185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,4096,0.3197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,2048,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,8192,0.2372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,16384,0.2449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,32768,0.2607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,65536,0.3058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,16,1,1,131072,0.3736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,2,0.2206
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,8,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,4,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,16,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,32,0.2153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,64,0.2154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,128,0.2205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,256,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,512,0.2216
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,1024,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,2048,0.2249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,4096,0.3225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,8192,0.2381
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,16384,0.2523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,32768,0.2800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,131072,0.4159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,2,0.2411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,32,1,1,65536,0.3247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,8,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,4,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,16,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,32,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,64,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,128,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,512,0.2360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,256,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,1024,0.2420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,2048,0.2449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,4096,0.3455
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,8192,0.2646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,16384,0.2880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,32768,0.3167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,65536,0.3911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,2,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,4,0.2749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,8,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,64,1,1,131072,0.5181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,16,0.2732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,64,0.2741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,128,0.2739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,32,0.2709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,256,0.2758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,2048,0.2767
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,512,0.2773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,1024,0.2774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,4096,0.3883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,8192,0.3107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,16384,0.3417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,32768,0.3910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,65536,0.5066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,2,0.3711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,128,1,1,131072,0.7211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,4,0.3723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,8,0.3704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,16,0.3716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,32,0.3708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,64,0.3685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,128,0.3731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,256,0.3749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,512,0.3740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,1024,0.3752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,2048,0.3850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,4096,0.5657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,16384,0.4919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,8192,0.4322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,32768,0.5900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,65536,0.7856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,2,0.5354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,256,1,1,131072,1.2129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,8,0.5360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,16,0.5353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,32,0.5357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,4,0.5346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,64,0.5392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,128,0.5454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,256,0.5454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,512,0.5469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,2048,0.5669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,4096,0.9113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,1024,0.5526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,8192,0.6524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,16384,0.7823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,32768,0.9816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,2,0.8367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,512,1,1,65536,1.3730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,8,0.8376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,4,0.8360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,16,0.8389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,32,0.8400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,64,0.8418
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,128,0.8406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,256,0.8463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,512,0.8549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,2048,0.8916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,4096,1.4943
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,1024,0.8652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,8192,1.0614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,2,0.1712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,16384,1.3239
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,8,1024,1,1,32768,1.7007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,4,0.1590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,8,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,16,0.1707
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,32,0.1617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,64,0.1648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,128,0.1696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,256,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,512,0.1647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,1024,0.1567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,2048,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,4096,0.2590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,8192,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,16384,0.1950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,32768,0.2007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,65536,0.2267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1,1,1,131072,0.2734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,2,0.1681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,4,0.1708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,8,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,16,0.1789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,32,0.1794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,64,0.1769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,128,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,256,0.1739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,512,0.1706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,1024,0.1717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,2048,0.1735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,4096,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,8192,0.1931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,16384,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,32768,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,65536,0.2414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,2,1,1,131072,0.2809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,2,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,8,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,16,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,4,0.1862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,32,0.1843
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,64,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,128,0.1802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,256,0.1818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,512,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,1024,0.1870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,2048,0.1923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,4096,0.2855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,32768,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,16384,0.2132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,8192,0.2014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,65536,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,2,0.1930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,4,1,1,131072,0.2964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,4,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,8,0.1899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,16,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,32,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,64,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,128,0.1942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,256,0.1945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,512,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,1024,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,2048,0.1944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,8192,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,4096,0.2897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,16384,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,32768,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,65536,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,8,1,1,131072,0.3212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,2,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,4,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,16,0.2115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,8,0.2068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,32,0.2151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,64,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,128,0.2148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,256,0.2090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,512,0.2145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,1024,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,2048,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,8192,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,4096,0.3165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,16384,0.2471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,32768,0.2594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,65536,0.3037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,16,1,1,131072,0.3701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,2,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,8,0.2165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,16,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,4,0.2133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,32,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,64,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,128,0.2154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,256,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,512,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,1024,0.2194
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,2048,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,4096,0.3204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,8192,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,16384,0.2498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,32768,0.2722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,65536,0.3259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,32,1,1,131072,0.4117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,2,0.2357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,4,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,8,0.2353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,16,0.2361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,32,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,64,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,128,0.2355
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,256,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,512,0.2380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,1024,0.2389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,2048,0.2411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,4096,0.3397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,8192,0.2567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,16384,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,32768,0.3190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,65536,0.3872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,64,1,1,131072,0.5192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,2,0.2681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,4,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,16,0.2665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,8,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,32,0.2678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,64,0.2645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,256,0.2691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,128,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,512,0.2734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,1024,0.2730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,4096,0.3820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,2048,0.2767
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,8192,0.3007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,16384,0.3344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,32768,0.3912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,65536,0.5008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,2,0.3629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,128,1,1,131072,0.7168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,4,0.3627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,8,0.3607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,16,0.3628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,64,0.3630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,32,0.3602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,128,0.3644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,256,0.3665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,512,0.3674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,1024,0.3704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,2048,0.3749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,4096,0.5596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,16384,0.4846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,8192,0.4231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,32768,0.5851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,65536,0.7811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,256,1,1,131072,1.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,2,0.5235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,4,0.5199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,8,0.5186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,32,0.5210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,16,0.5186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,64,0.5230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,128,0.5274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,256,0.5291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,512,0.5308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,1024,0.5354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,2048,0.5500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,4096,0.8970
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,8192,0.6358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,16384,0.7673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,32768,0.9681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,2,0.8032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,4,0.8059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,512,1,1,65536,1.3492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,8,0.8058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,16,0.8076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,32,0.8084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,64,0.8124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,128,0.8114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,256,0.8117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,512,0.8219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,2048,0.8621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,1024,0.8378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,4096,1.4618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,2,0.1608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,16384,1.2872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,8192,1.0345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,4,0.1644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,8,0.1607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,16,0.1685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,4,1024,1,1,32768,1.6721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,32,0.1607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,64,0.1610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,128,0.1654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,256,0.1526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,512,0.1644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,2048,0.1602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,1024,0.1628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,4096,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,8192,0.1738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,16384,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,32768,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,65536,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1,1,1,131072,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,2,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,4,0.1679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,8,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,32,0.1623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,16,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,64,0.1742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,128,0.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,256,0.1608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,512,0.1706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,1024,0.1691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,4096,0.2556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,2048,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,8192,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,16384,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,32768,0.2112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,65536,0.2256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,2,1,1,131072,0.2817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,2,0.1796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,4,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,8,0.1808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,16,0.1834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,32,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,64,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,128,0.1732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,256,0.1754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,512,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,1024,0.1828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,2048,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,4096,0.2734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,8192,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,16384,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,32768,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,65536,0.2506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,4,1,1,131072,0.2918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,2,0.1901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,4,0.1905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,8,0.1898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,16,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,32,0.1901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,64,0.1866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,128,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,256,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,512,0.1911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,1024,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,2048,0.1927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,4096,0.2896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,8192,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,16384,0.2178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,32768,0.2375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,65536,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,8,1,1,131072,0.3250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,2,0.2105
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,4,0.2055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,8,0.2070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,16,0.2094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,64,0.2073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,32,0.2034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,128,0.2024
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,256,0.2103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,512,0.2081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,1024,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,2048,0.2152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,4096,0.3121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,16384,0.2439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,32768,0.2547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,65536,0.2964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,131072,0.3701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,16,1,1,8192,0.2316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,2,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,4,0.2063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,16,0.2083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,32,0.2065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,64,0.2111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,128,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,256,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,8,0.2090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,512,0.2088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,1024,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,2048,0.2176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,4096,0.3120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,8192,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,16384,0.2515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,32768,0.2759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,65536,0.3246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,2,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,32,1,1,131072,0.4080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,4,0.2317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,8,0.2272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,16,0.2313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,32,0.2321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,64,0.2321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,256,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,128,0.2265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,512,0.2335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,1024,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,2048,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,4096,0.3389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,8192,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,16384,0.2833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,65536,0.3841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,32768,0.3155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,64,1,1,131072,0.5186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,2,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,8,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,4,0.2592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,16,0.2595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,32,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,64,0.2629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,128,0.2649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,256,0.2661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,512,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,1024,0.2717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,2048,0.2708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,4096,0.3744
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,8192,0.3036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,16384,0.3346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,32768,0.3883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,65536,0.4986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,128,1,1,131072,0.7149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,2,0.3574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,4,0.3554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,8,0.3561
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,16,0.3591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,32,0.3575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,64,0.3595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,128,0.3587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,256,0.3612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,2048,0.3711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,512,0.3595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,1024,0.3673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,4096,0.5560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,8192,0.4175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,16384,0.4816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,32768,0.5770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,65536,0.7808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,2,0.5111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,4,0.5118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,256,1,1,131072,1.2011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,8,0.5129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,16,0.5112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,32,0.5139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,64,0.5153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,128,0.5204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,256,0.5221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,512,0.5211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,1024,0.5299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,2048,0.5432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,4096,0.8836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,8192,0.6306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,16384,0.7588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,32768,0.9559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,2,0.7864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,4,0.7915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,512,1,1,65536,1.3434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,16,0.7898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,8,0.7896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,32,0.7922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,64,0.7946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,128,0.7945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,256,0.7977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,512,0.8070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,2048,0.8448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,1024,0.8218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,4096,1.4411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,8192,1.0171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,16384,1.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,2,0.1506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,4,0.1664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,2,1024,1,1,32768,1.6445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,8,0.1589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,32,0.1620
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,64,0.1630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,16,0.1509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,128,0.1607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,256,0.1509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,512,0.1643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,1024,0.1636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,2048,0.1597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,8192,0.1723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,4096,0.2531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,16384,0.1710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,32768,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,65536,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1,1,1,131072,0.2688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,2,0.1643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,4,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,8,0.1686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,16,0.1685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,64,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,32,0.1681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,128,0.1611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,256,0.1681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,512,0.1687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,1024,0.1643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,2048,0.1686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,8192,0.1779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,16384,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,4096,0.2627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,32768,0.1983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,65536,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,2,1,1,131072,0.2819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,2,0.1711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,4,0.1711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,8,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,16,0.1736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,32,0.1698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,64,0.1743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,128,0.1722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,256,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,512,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,1024,0.1776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,4096,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,2048,0.1727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,8192,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,16384,0.1985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,32768,0.2152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,65536,0.2490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,4,1,1,131072,0.2904
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,4,0.1863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,8,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,16,0.1793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,2,0.1776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,32,0.1877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,64,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,128,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,256,0.1835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,512,0.1867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,1024,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,2048,0.1835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,4096,0.2811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,8192,0.1933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,16384,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,32768,0.2337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,65536,0.2604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,8,1,1,131072,0.3227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,4,0.2046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,2,0.2051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,8,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,16,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,32,0.2073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,64,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,128,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,256,0.2074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,512,0.2002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,1024,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,4096,0.3051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,2048,0.2135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,8192,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,16384,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,32768,0.2601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,65536,0.3010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,16,1,1,131072,0.3599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,2,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,4,0.2023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,8,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,16,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,32,0.2034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,128,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,64,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,256,0.2013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,512,0.2062
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,1024,0.2051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,2048,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,4096,0.3149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,8192,0.2274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,16384,0.2515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,32768,0.2705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,65536,0.3159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,32,1,1,131072,0.4132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,2,0.2223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,4,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,8,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,16,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,32,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,64,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,128,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,256,0.2313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,512,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,1024,0.2349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,4096,0.3356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,8192,0.2590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,2048,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,16384,0.2798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,32768,0.3098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,65536,0.3848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,64,1,1,131072,0.5094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,4,0.2625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,8,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,16,0.2628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,2,0.2610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,32,0.2562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,64,0.2643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,128,0.2629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,256,0.2597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,512,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,1024,0.2699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,2048,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,4096,0.3773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,8192,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,16384,0.3349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,32768,0.3858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,65536,0.4926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,2,0.3562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,4,0.3549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,128,1,1,131072,0.7149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,8,0.3557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,16,0.3523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,32,0.3550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,64,0.3552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,128,0.3549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,256,0.3594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,512,0.3599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,1024,0.3622
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,4096,0.5482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,2048,0.3712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,8192,0.4203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,16384,0.4779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,32768,0.5782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,2,0.5063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,4,0.5052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,65536,0.7716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,256,1,1,131072,1.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,8,0.5076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,16,0.5066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,32,0.5078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,64,0.5104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,128,0.5144
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,256,0.5148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,512,0.5199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,2048,0.5413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,1024,0.5262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,4096,0.8833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,8192,0.6233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,16384,0.7592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,32768,0.9510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,2,0.7805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,512,1,1,65536,1.3361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,4,0.7829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,16,0.7819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,8,0.7814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,32,0.7841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,64,0.7890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,128,0.7857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,256,0.7931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,512,0.7979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,2048,0.8397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,4096,1.4374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,1024,0.8135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,8192,1.0088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,16384,1.2589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,float16,1,1024,1,1,32768,1.6433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,8,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,32,0.2878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,64,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,128,0.2788
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,512,0.2853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,1024,0.2888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,4,0.2799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,2,0.2940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,2048,0.2777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,4096,0.3904
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,16,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,8192,0.3171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,256,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,16384,0.3178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,32768,0.3196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,65536,0.3399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1,1,1,131072,0.4017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,2,0.3155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,4,0.3257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,8,0.3406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,32,0.3348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,64,0.3305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,128,0.3412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,256,0.3193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,512,0.3176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,1024,0.3377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,16,0.3336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,2048,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,4096,0.4664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,8192,0.3568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,16384,0.3768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,32768,0.3777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,65536,0.4524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,2,0.3016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,2,1,1,131072,0.5535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,4,0.3012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,8,0.3047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,16,0.3029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,32,0.2995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,64,0.2881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,128,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,256,0.3002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,512,0.2887
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,1024,0.3038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,2048,0.3092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,4096,0.4291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,8192,0.3248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,16384,0.3185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,32768,0.3409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,65536,0.3729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,4,1,1,131072,0.4059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,2,0.3111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,4,0.3140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,8,0.3122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,16,0.3027
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,32,0.3091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,64,0.3098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,512,0.3128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,256,0.3091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,1024,0.3158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,128,0.3027
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,4096,0.4330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,2048,0.3160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,8192,0.3425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,16384,0.3435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,32768,0.3588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,65536,0.3908
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,8,1,1,131072,0.4467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,2,0.3215
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,4,0.3232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,8,0.3314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,16,0.3255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,32,0.3276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,64,0.3266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,128,0.3310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,256,0.3299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,512,0.3231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,1024,0.3281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,2048,0.3400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,4096,0.4602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,8192,0.3636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,16384,0.3670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,32768,0.3873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,65536,0.4208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,16,1,1,131072,0.4815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,2,0.3290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,4,0.3346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,8,0.3314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,16,0.3325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,64,0.3316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,32,0.3346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,256,0.3332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,512,0.3286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,128,0.3281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,1024,0.3370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,2048,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,4096,0.4701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,8192,0.3747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,16384,0.3785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,32768,0.3953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,65536,0.4495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,32,1,1,131072,0.5315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,2,0.3727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,4,0.3596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,16,0.3579
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,8,0.3620
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,32,0.3581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,64,0.3605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,256,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,128,0.3537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,512,0.3624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,1024,0.3715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,4096,0.5037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,8192,0.4073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,2048,0.3730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,16384,0.4111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,32768,0.4517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,65536,0.5159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,64,1,1,131072,0.6454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,4,0.4428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,2,0.4401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,8,0.4403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,16,0.4436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,32,0.4480
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,64,0.4586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,128,0.4395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,256,0.4517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,512,0.4489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,1024,0.4586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,2048,0.4656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,4096,0.6031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,8192,0.5014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,16384,0.5280
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,32768,0.5829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,65536,0.6925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,128,1,1,131072,0.9182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,4,0.5844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,2,0.5836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,8,0.5860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,16,0.6156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,32,0.6201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,64,0.5911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,128,0.5898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,256,0.5996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,512,0.6180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,1024,0.6289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,2048,0.6454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,4096,0.8810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,8192,0.7060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,16384,0.7588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,32768,0.8586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,65536,1.0520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,256,1,1,131072,1.4718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,2,0.8643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,4,0.8606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,8,0.9447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,16,0.8613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,32,0.8751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,64,0.8650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,128,0.8756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,2048,0.9735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,256,0.8944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,512,0.9249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,1024,0.9210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,4096,1.4312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,8192,1.0821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,16384,1.1866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,32768,1.3973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,512,1,1,65536,1.7875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,8,1.4661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,16,1.4963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,32,1.6678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,4,1.5329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,2,1.4759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,64,1.4757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,128,1.4821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,256,1.5054
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,512,1.5277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,2048,1.6867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,4096,2.4640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,1024,1.5995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,16384,2.0880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,8192,1.8784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,128,1024,1,1,32768,2.5008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,2,0.2732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,4,0.2583
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,8,0.2485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,32,0.2463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,128,0.2575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,16,0.2574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,64,0.2568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,512,0.2781
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,1024,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,2048,0.2483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,256,0.2567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,4096,0.3632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,8192,0.2844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,16384,0.2876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,32768,0.2921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,65536,0.3258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1,1,1,131072,0.3670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,2,0.2895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,8,0.2881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,4,0.3225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,16,0.3047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,32,0.3072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,64,0.3034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,256,0.3082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,128,0.3094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,512,0.2865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,1024,0.2888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,2048,0.3105
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,4096,0.4371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,8192,0.3341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,16384,0.3399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,32768,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,65536,0.4338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,2,1,1,131072,0.5225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,2,0.2666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,4,0.2825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,8,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,16,0.2807
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,32,0.2767
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,128,0.2838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,64,0.2825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,256,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,512,0.2662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,1024,0.2785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,2048,0.2828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,4096,0.3995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,16384,0.3133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,8192,0.3106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,32768,0.3102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,65536,0.3436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,4,1,1,131072,0.3845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,2,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,4,0.2858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,8,0.2869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,16,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,32,0.2775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,64,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,128,0.2825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,256,0.2775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,512,0.2885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,1024,0.2907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,2048,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,4096,0.4101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,8192,0.3092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,16384,0.3187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,32768,0.3300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,65536,0.3533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,8,1,1,131072,0.4234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,2,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,4,0.3008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,8,0.3011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,16,0.2966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,32,0.3023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,64,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,128,0.2948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,256,0.3073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,512,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,1024,0.3032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,2048,0.3092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,4096,0.4226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,8192,0.3349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,16384,0.3374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,32768,0.3492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,65536,0.3991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,16,1,1,131072,0.4613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,2,0.3035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,4,0.3011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,8,0.2969
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,16,0.3022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,32,0.2988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,64,0.2959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,128,0.3080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,512,0.3022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,1024,0.3032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,2048,0.3128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,256,0.3045
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,8192,0.3372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,16384,0.3490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,4096,0.4376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,32768,0.3724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,65536,0.4177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,32,1,1,131072,0.5013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,4,0.3229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,2,0.3228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,8,0.3143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,32,0.3199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,16,0.3169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,64,0.3255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,128,0.3203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,256,0.3212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,512,0.3261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,1024,0.3265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,2048,0.3285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,8192,0.3653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,4096,0.4581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,16384,0.3815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,32768,0.4128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,65536,0.4758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,64,1,1,131072,0.6107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,2,0.3839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,4,0.3784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,8,0.3844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,32,0.3793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,16,0.3842
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,128,0.3910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,64,0.3820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,512,0.3893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,256,0.3961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,1024,0.3897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,2048,0.4082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,4096,0.5367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,16384,0.4648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,8192,0.4469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,32768,0.5260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,65536,0.6373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,128,1,1,131072,0.8444
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,2,0.4956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,4,0.4919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,16,0.5178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,8,0.4914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,64,0.4913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,32,0.4915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,128,0.4955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,512,0.5087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,256,0.5026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,1024,0.5227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,2048,0.5432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,4096,0.7812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,8192,0.6047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,16384,0.6551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,32768,0.7584
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,65536,0.9551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,256,1,1,131072,1.3761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,4,0.7831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,2,0.7130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,8,0.7052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,32,0.7114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,16,0.7067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,64,0.7534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,128,0.7194
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,512,0.7566
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,1024,0.7761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,256,0.7310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,2048,0.8160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,4096,1.2635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,16384,1.0346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,32768,1.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,2,1.1950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,65536,1.6131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,4,1.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,16,1.1826
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,32,1.1908
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,8,1.1800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,512,1,1,8192,0.9238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,64,1.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,128,1.3152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,256,1.2177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,512,1.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,1024,1.3082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,2048,1.4060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,4096,2.1571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,8192,1.5874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,2,0.2404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,16384,1.7952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,4,0.2379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,8,0.2394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,16,0.2414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,64,1024,1,1,32768,2.2316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,64,0.2291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,32,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,128,0.2361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,256,0.2419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,512,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,1024,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,2048,0.2316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,4096,0.3458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,8192,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,16384,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,32768,0.2762
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,65536,0.3120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1,1,1,131072,0.3512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,2,0.2902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,4,0.2689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,8,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,16,0.2884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,32,0.2849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,64,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,128,0.2862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,256,0.2846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,512,0.2839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,1024,0.2714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,4096,0.4188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,8192,0.3119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,2048,0.2721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,32768,0.3493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,16384,0.3170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,65536,0.4105
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,2,1,1,131072,0.5196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,2,0.2494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,8,0.2619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,4,0.2602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,16,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,64,0.2628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,32,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,128,0.2632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,512,0.2498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,256,0.2602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,1024,0.2687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,2048,0.2655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,4096,0.3782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,8192,0.2870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,16384,0.2819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,32768,0.3019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,65536,0.3198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,4,1,1,131072,0.3779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,2,0.2714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,8,0.2646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,16,0.2737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,4,0.2688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,32,0.2611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,64,0.2687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,128,0.2621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,256,0.2701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,512,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,1024,0.2597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,4096,0.3939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,8192,0.2951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,2048,0.2708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,16384,0.3037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,32768,0.3196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,65536,0.3405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,8,1,1,131072,0.4079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,2,0.2793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,4,0.2851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,8,0.2844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,16,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,32,0.2866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,64,0.2825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,128,0.2877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,256,0.2809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,512,0.2793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,1024,0.2876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,4096,0.4135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,2048,0.2923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,8192,0.3229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,16384,0.3250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,32768,0.3393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,65536,0.3709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,16,1,1,131072,0.4401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,2,0.2822
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,8,0.2826
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,4,0.2792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,16,0.2837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,64,0.2816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,32,0.2832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,128,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,256,0.2773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,512,0.2862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,1024,0.2865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,2048,0.2926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,4096,0.4233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,8192,0.3229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,16384,0.3287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,32768,0.3454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,65536,0.3934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,32,1,1,131072,0.4882
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,4,0.2952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,2,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,8,0.2973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,16,0.2970
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,32,0.2938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,64,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,128,0.2870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,256,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,512,0.2967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,1024,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,2048,0.3084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,4096,0.4370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,16384,0.3451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,8192,0.3413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,32768,0.3814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,65536,0.4547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,2,0.3454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,64,1,1,131072,0.5830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,4,0.3429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,8,0.3470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,32,0.3439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,16,0.3392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,64,0.3390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,128,0.3452
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,256,0.3486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,512,0.3475
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,1024,0.3608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,2048,0.3678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,4096,0.4979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,8192,0.4044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,32768,0.4875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,16384,0.4312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,65536,0.5970
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,2,0.4325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,128,1,1,131072,0.8096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,8,0.4353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,4,0.4472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,32,0.4337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,16,0.4297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,64,0.4341
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,128,0.4369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,256,0.4578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,512,0.4619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,1024,0.4599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,2048,0.4865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,4096,0.7300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,8192,0.5408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,16384,0.5954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,32768,0.7005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,65536,0.8941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,256,1,1,131072,1.3136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,8,0.6287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,4,0.5826
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,2,0.5916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,16,0.5866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,32,0.5906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,64,0.5910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,128,0.5961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,256,0.6043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,512,0.6331
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,1024,0.6393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,4096,1.1377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,8192,0.7993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,2048,0.6921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,16384,0.9089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,32768,1.1152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,512,1,1,65536,1.5068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,2,0.9353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,4,1.0249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,8,0.9468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,16,0.9508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,32,0.9580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,64,0.9783
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,128,0.9684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,256,0.9853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,512,1.0077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,1024,1.0664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,2048,1.1606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,4096,1.9372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,16384,1.5735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,8192,1.3593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,2,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,4,0.2047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,32,1024,1,1,32768,1.9624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,8,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,16,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,32,0.2213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,64,0.2095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,128,0.2090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,256,0.2043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,512,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,2048,0.2089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,4096,0.3398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,1024,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,16384,0.2403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,8192,0.2520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,32768,0.2577
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,65536,0.2715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,2,0.2728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1,1,1,131072,0.3422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,4,0.2463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,8,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,64,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,32,0.2518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,128,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,16,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,256,0.2573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,1024,0.2553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,2048,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,512,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,4096,0.2845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,8192,0.2782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,16384,0.2855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,32768,0.2993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,65536,0.3844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,2,1,1,131072,0.5137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,2,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,4,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,8,0.2335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,16,0.2299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,32,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,64,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,512,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,256,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,1024,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,128,0.2368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,2048,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,4096,0.3487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,16384,0.2729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,8192,0.2607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,131072,0.3576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,32768,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,4,1,1,65536,0.3230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,2,0.2460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,4,0.2436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,8,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,16,0.2484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,32,0.2334
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,64,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,256,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,128,0.2509
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,512,0.2456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,1024,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,2048,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,4096,0.3716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,8192,0.2897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,16384,0.2759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,32768,0.3095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,65536,0.3397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,8,1,1,131072,0.4007
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,2,0.2583
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,4,0.2583
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,8,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,16,0.2498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,64,0.2604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,32,0.2702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,128,0.2739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,256,0.2664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,512,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,1024,0.2694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,2048,0.2758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,4096,0.3926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,16384,0.3152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,8192,0.3030
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,65536,0.3726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,32768,0.3357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,16,1,1,131072,0.4348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,2,0.2582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,4,0.2567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,16,0.2572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,8,0.2510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,128,0.2687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,64,0.2580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,32,0.2678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,256,0.2667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,512,0.2641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,1024,0.2738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,2048,0.2831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,4096,0.3958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,8192,0.3164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,32768,0.3405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,16384,0.3177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,65536,0.3866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,32,1,1,131072,0.4717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,2,0.2785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,4,0.2749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,8,0.2665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,16,0.2777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,32,0.2707
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,64,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,128,0.2820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,256,0.2753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,512,0.2914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,1024,0.2914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,2048,0.2904
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,4096,0.4255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,8192,0.3283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,16384,0.3427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,32768,0.3731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,65536,0.4377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,64,1,1,131072,0.5726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,2,0.3224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,4,0.3319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,8,0.3239
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,16,0.3276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,32,0.3187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,64,0.3189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,128,0.3176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,256,0.3395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,512,0.3324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,2048,0.3503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,1024,0.3363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,4096,0.4803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,8192,0.3888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,16384,0.4093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,32768,0.4631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,131072,0.7899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,128,1,1,65536,0.5763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,2,0.4063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,4,0.4286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,16,0.4058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,8,0.4161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,32,0.4035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,64,0.4036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,128,0.4131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,256,0.4124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,512,0.4275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,1024,0.4359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,2048,0.4537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,4096,0.6873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,16384,0.5630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,8192,0.5124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,32768,0.6630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,65536,0.8581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,2,0.5358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,256,1,1,131072,1.2941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,8,0.5757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,4,0.5335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,32,0.5356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,16,0.5378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,64,0.5390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,128,0.5397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,512,0.5942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,256,0.5506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,1024,0.6025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,2048,0.6373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,4096,1.0818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,8192,0.7492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,16384,0.8505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,32768,1.0585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,512,1,1,65536,1.4489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,2,0.8270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,4,0.9073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,8,0.8291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,16,0.8311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,32,0.8456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,64,0.8410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,128,0.8406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,256,0.8635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,512,0.8885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,2048,1.0347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,1024,0.9454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,4096,1.8213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,8192,1.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,32768,1.8545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,16,1024,1,1,16384,1.4451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,2,0.2145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,4,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,8,0.1954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,16,0.1993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,32,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,128,0.1981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,64,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,512,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,256,0.1993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,1024,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,2048,0.1961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,8192,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,16384,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,4096,0.3228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,32768,0.2439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,65536,0.2630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1,1,1,131072,0.3122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,2,0.2454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,8,0.2327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,4,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,64,0.2512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,16,0.2492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,128,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,32,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,256,0.2479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,512,0.2464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,1024,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,4096,0.2656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,8192,0.2731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,16384,0.2758
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,2048,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,65536,0.3676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,131072,0.5009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,2,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,8,0.2265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,16,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,4,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,32,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,2,1,1,32768,0.2918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,128,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,256,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,64,0.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,1024,0.2342
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,512,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,2048,0.2306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,4096,0.3449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,16384,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,32768,0.2734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,65536,0.3004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,8192,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,16,0.2258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,2,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,4,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,8,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,4,1,1,131072,0.3730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,128,0.2229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,256,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,32,0.2371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,64,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,512,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,1024,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,2048,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,4096,0.3597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,16384,0.2656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,65536,0.3346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,32768,0.2995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,8192,0.2516
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,8,1,1,131072,0.3956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,2,0.2494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,4,0.2468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,8,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,64,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,16,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,1024,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,512,0.2526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,256,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,32,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,2048,0.2655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,128,0.2439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,4096,0.3826
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,16384,0.2993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,32768,0.3314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,8192,0.2995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,65536,0.3652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,16,1,1,131072,0.4339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,4,0.2413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,2,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,8,0.2501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,16,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,128,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,32,0.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,256,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,512,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,1024,0.2643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,64,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,2048,0.2752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,4096,0.4052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,8192,0.3109
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,16384,0.3137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,32768,0.3411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,65536,0.3801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,32,1,1,131072,0.4742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,2,0.2538
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,32,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,128,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,64,0.2594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,256,0.2737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,8,0.2568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,16,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,4,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,512,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,4096,0.4194
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,2048,0.2931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,1024,0.2857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,8192,0.3259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,65536,0.4315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,16384,0.3353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,32768,0.3639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,64,1,1,131072,0.5646
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,2,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,4,0.3026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,32,0.2920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,64,0.3019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,16,0.3099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,128,0.3035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,256,0.3156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,8,0.3062
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,512,0.3227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,1024,0.3281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,8192,0.3764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,4096,0.4722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,2048,0.3350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,65536,0.5649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,32768,0.4552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,131072,0.7846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,4,0.3820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,2,0.3876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,128,1,1,16384,0.4041
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,32,0.3981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,8,0.3872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,16,0.3865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,128,0.3901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,256,0.4075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,64,0.3862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,512,0.4036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,1024,0.4162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,8192,0.4979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,2048,0.4376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,4096,0.6714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,16384,0.5511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,65536,0.8441
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,32768,0.6439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,256,1,1,131072,1.2705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,2,0.5111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,4,0.5119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,8,0.5500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,32,0.5067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,64,0.5135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,256,0.5240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,16,0.5089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,128,0.5132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,512,0.5742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,1024,0.5579
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,2048,0.6126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,4096,1.0542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,8192,0.7167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,32768,1.0305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,2,0.7820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,65536,1.4153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,512,1,1,16384,0.8199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,4,0.8515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,8,0.9476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,16,0.7756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,32,0.7818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,64,0.7878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,256,0.8030
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,512,0.8308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,1024,0.8971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,128,0.7986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,2048,0.9867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,4096,1.7477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,16384,1.3874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,8192,1.1767
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,4,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,8,1024,1,1,32768,1.8001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,2,0.1985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,16,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,32,0.1954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,8,0.1982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,128,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,64,0.1983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,256,0.1924
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,512,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,1024,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,2048,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,4096,0.3159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,8192,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,16384,0.2313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,32768,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,65536,0.2547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1,1,1,131072,0.2980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,8,0.2408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,4,0.2370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,2,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,16,0.2449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,32,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,64,0.2317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,128,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,256,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,1024,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,512,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,2048,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,8192,0.2645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,4096,0.3759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,65536,0.3635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,16384,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,32768,0.2905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,2,0.2256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,4,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,8,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,16,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,64,0.2319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,2,1,1,131072,0.4637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,32,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,256,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,128,0.2123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,512,0.2252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,1024,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,2048,0.2319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,8192,0.2460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,4096,0.3511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,16384,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,65536,0.2961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,2,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,32768,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,4,1,1,131072,0.3621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,4,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,8,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,16,0.2223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,64,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,32,0.2347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,128,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,256,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,1024,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,4096,0.3448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,512,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,8192,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,2048,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,32768,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,65536,0.3345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,16384,0.2640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,2,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,8,1,1,131072,0.3930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,4,0.2472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,8,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,16,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,32,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,64,0.2480
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,128,0.2490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,256,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,512,0.2595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,1024,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,2048,0.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,4096,0.3800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,8192,0.2955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,16384,0.2954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,65536,0.3549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,32768,0.3267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,2,0.2442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,16,1,1,131072,0.4351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,4,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,8,0.2434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,16,0.2468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,64,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,32,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,256,0.2433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,128,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,512,0.2396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,1024,0.2504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,2048,0.2749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,4096,0.4035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,8192,0.3033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,65536,0.3770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,16384,0.3114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,32768,0.3333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,32,1,1,131072,0.4652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,4,0.2502
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,2,0.2547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,8,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,16,0.2610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,32,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,64,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,256,0.2576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,512,0.2618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,128,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,1024,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,2048,0.2901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,8192,0.3142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,16384,0.3300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,4096,0.4154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,32768,0.3604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,65536,0.4305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,64,1,1,131072,0.5626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,4,0.2899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,2,0.2989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,8,0.2874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,16,0.2928
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,32,0.3021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,64,0.3124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,256,0.2940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,128,0.2970
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,512,0.3162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,1024,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,2048,0.3294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,4096,0.4653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,8192,0.3700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,16384,0.3991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,32768,0.4537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,65536,0.5607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,128,1,1,131072,0.7800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,2,0.3751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,4,0.3796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,32,0.3671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,8,0.4021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,128,0.3823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,64,0.3745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,16,0.3861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,256,0.3893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,512,0.3940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,2048,0.4262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,1024,0.4081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,4096,0.6669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,16384,0.5379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,8192,0.4886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,32768,0.6403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,65536,0.8406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,4,0.4936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,2,0.5653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,256,1,1,131072,1.2677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,8,0.4959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,16,0.4935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,64,0.5345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,32,0.4945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,128,0.5241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,256,0.5111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,512,0.5326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,1024,0.5471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,2048,0.5940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,4096,1.0416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,8192,0.7037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,16384,0.8058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,32768,1.0161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,2,0.8888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,512,1,1,65536,1.4102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,4,0.7566
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,8,0.7477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,16,0.7479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,32,0.7556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,64,0.7621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,128,0.7665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,256,0.8005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,512,0.8073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,2048,0.9528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,1024,0.8565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,4096,1.7273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,8192,1.1591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,2,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,16384,1.3580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,4,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,8,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,4,1024,1,1,32768,1.7637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,16,0.1897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,32,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,64,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,128,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,256,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,1024,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,2048,0.2067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,4096,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,512,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,8192,0.2256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,16384,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,32768,0.2270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,65536,0.2590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1,1,1,131072,0.3043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,4,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,2,0.2449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,8,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,16,0.2415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,32,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,128,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,64,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,256,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,512,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,1024,0.2431
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,2048,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,4096,0.3700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,8192,0.2466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,16384,0.2665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,32768,0.2974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,65536,0.3552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,2,1,1,131072,0.4575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,2,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,4,0.2205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,8,0.2212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,16,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,32,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,64,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,128,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,256,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,512,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,1024,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,2048,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,4096,0.3378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,8192,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,16384,0.2496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,32768,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,65536,0.2919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,2,0.2342
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,4,1,1,131072,0.3531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,4,0.2306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,8,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,16,0.2266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,32,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,64,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,128,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,512,0.2368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,256,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,1024,0.2308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,2048,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,4096,0.3452
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,8192,0.2559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,16384,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,32768,0.2893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,65536,0.3302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,8,1,1,131072,0.3894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,8,0.2439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,4,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,2,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,16,0.2466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,32,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,64,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,128,0.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,256,0.2472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,512,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,1024,0.2404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,2048,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,4096,0.3800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,8192,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,16384,0.2885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,32768,0.3244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,65536,0.3628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,16,1,1,131072,0.4267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,2,0.2327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,8,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,16,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,4,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,32,0.2440
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,64,0.2450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,128,0.2408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,256,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,512,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,1024,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,2048,0.2676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,4096,0.4040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,8192,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,16384,0.3139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,32768,0.3309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,65536,0.3776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,32,1,1,131072,0.4588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,2,0.2600
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,4,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,8,0.2644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,16,0.2499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,32,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,64,0.2513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,128,0.2457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,256,0.2526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,512,0.2527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,1024,0.2751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,2048,0.2873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,4096,0.4161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,8192,0.3188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,16384,0.3301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,32768,0.3618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,65536,0.4247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,2,0.3018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,64,1,1,131072,0.5555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,4,0.3038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,16,0.2926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,8,0.2906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,32,0.2865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,64,0.2859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,128,0.3036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,256,0.2930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,512,0.3160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,4096,0.4666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,1024,0.3235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,2048,0.3280
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,8192,0.3722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,16384,0.3942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,32768,0.4452
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,65536,0.5605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,128,1,1,131072,0.7768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,2,0.3651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,8,0.3585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,4,0.3858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,16,0.3727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,32,0.3736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,64,0.3987
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,128,0.3797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,256,0.4046
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,512,0.3945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,2048,0.4267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,1024,0.4005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,4096,0.6589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,8192,0.4846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,32768,0.6367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,16384,0.5364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,65536,0.8331
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,2,0.5284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,4,0.4916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,8,0.4886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,256,1,1,131072,1.2594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,32,0.4861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,16,0.5572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,64,0.4933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,128,0.5647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,256,0.5246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,512,0.5183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,2048,0.5876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,1024,0.5380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,4096,1.0384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,8192,0.6931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,16384,0.8043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,32768,1.0092
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,2,0.7400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,8,0.7332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,4,0.7354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,512,1,1,65536,1.4010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,16,0.7358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,64,0.7481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,32,0.8738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,128,0.7532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,256,0.7768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,512,0.7911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,2048,0.9481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,4096,1.7086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,1024,0.8354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,8192,1.1398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,16384,1.3512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,2,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,2,1024,1,1,32768,1.7593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,4,0.1932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,8,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,32,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,16,0.1954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,64,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,128,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,256,0.1880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,512,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,2048,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,8192,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,1024,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,4096,0.3123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,16384,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,32768,0.2279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,65536,0.2515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1,1,1,131072,0.2962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,2,0.2474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,4,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,8,0.2435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,16,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,32,0.2449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,64,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,128,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,256,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,512,0.2473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,1024,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,2048,0.2320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,4096,0.3616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,16384,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,8192,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,32768,0.3019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,131072,0.4798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,2,1,1,65536,0.3494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,2,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,4,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,8,0.2173
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,32,0.2043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,16,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,64,0.2223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,256,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,128,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,512,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,1024,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,2048,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,4096,0.3265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,8192,0.2402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,16384,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,32768,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,65536,0.2981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,4,1,1,131072,0.3576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,2,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,4,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,16,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,8,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,32,0.2225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,64,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,128,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,256,0.2286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,1024,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,512,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,2048,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,8192,0.2552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,4096,0.3449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,16384,0.2526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,32768,0.2786
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,65536,0.3271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,2,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,8,1,1,131072,0.3855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,4,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,8,0.2480
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,16,0.2436
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,32,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,64,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,128,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,256,0.2374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,512,0.2459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,1024,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,2048,0.2558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,4096,0.3763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,8192,0.2764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,16384,0.2920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,32768,0.3184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,65536,0.3534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,16,1,1,131072,0.4305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,2,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,4,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,8,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,16,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,32,0.2408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,64,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,256,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,128,0.2390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,512,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,1024,0.2476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,2048,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,4096,0.3940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,8192,0.2996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,16384,0.2989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,32768,0.3296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,65536,0.3821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,32,1,1,131072,0.4574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,2,0.2490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,4,0.2486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,16,0.2492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,8,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,32,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,64,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,128,0.2392
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,256,0.2499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,512,0.2512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,1024,0.2776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,2048,0.2837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,4096,0.4093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,8192,0.3089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,16384,0.3305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,32768,0.3555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,65536,0.4319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,64,1,1,131072,0.5584
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,2,0.2858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,4,0.2884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,16,0.2828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,8,0.2757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,32,0.3068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,64,0.2781
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,128,0.2871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,256,0.2889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,1024,0.3186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,512,0.3195
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,2048,0.3240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,4096,0.4605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,8192,0.3732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,16384,0.3855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,32768,0.4502
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,65536,0.5581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,128,1,1,131072,0.7698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,2,0.3726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,4,0.3765
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,8,0.3718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,16,0.3657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,32,0.3546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,64,0.3688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,128,0.3774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,256,0.3907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,512,0.3906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,1024,0.4029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,2048,0.4255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,4096,0.6602
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,16384,0.5335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,8192,0.4828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,32768,0.6345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,65536,0.8326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,256,1,1,131072,1.2581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,2,0.4821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,8,0.4823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,16,0.4855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,4,0.4836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,64,0.4881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,32,0.4844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,128,0.5190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,256,0.4966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,512,0.5135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,2048,0.5840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,4096,1.0287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,1024,0.5360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,8192,0.6913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,16384,0.8002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,32768,1.0084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,512,1,1,65536,1.3894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,2,0.7333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,4,0.8884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,16,0.7281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,8,0.7291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,32,0.7354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,64,0.8876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,128,0.7447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,256,0.7572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,2048,0.9345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,512,0.7861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,1024,0.8351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,4096,1.6968
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,16384,1.3434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,8192,1.1287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,2,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,fp8_block,1,1024,1,1,32768,1.7536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,4,0.2647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,8,0.2685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,16,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,32,0.2588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,64,0.2599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,128,0.2621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,512,0.2611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,256,0.2766
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,1024,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,2048,0.2694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,4096,0.3780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,8192,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,16384,0.2886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,32768,0.3284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,65536,0.3164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1,1,1,131072,0.3664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,2,0.3526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,4,0.3587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,8,0.3327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,16,0.3446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,32,0.3479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,64,0.3499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,128,0.3355
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,256,0.3527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,512,0.3411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,1024,0.3573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,2048,0.3340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,4096,0.3624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,8192,0.3721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,16384,0.3856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,32768,0.3945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,65536,0.4734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,2,1,1,131072,0.5768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,2,0.2927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,4,0.2828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,8,0.2869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,16,0.2875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,32,0.2789
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,64,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,128,0.2878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,256,0.2925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,512,0.2839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,1024,0.2907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,2048,0.2869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,4096,0.4030
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,8192,0.3029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,16384,0.3150
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,32768,0.3252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,65536,0.3578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,4,1,1,131072,0.3937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,2,0.2931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,4,0.3009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,8,0.3045
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,16,0.3016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,64,0.3024
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,32,0.3021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,128,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,256,0.2907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,512,0.3003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,1024,0.2915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,2048,0.3022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,8192,0.3271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,16384,0.3306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,4096,0.4251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,32768,0.3438
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,65536,0.3623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,8,1,1,131072,0.4270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,2,0.3126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,4,0.3214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,8,0.3203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,16,0.3208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,32,0.3240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,64,0.3240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,128,0.3124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,256,0.3166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,512,0.3169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,1024,0.3214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,4096,0.4483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,2048,0.3235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,32768,0.3714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,8192,0.3412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,16384,0.3530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,65536,0.4032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,16,1,1,131072,0.4667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,4,0.3371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,8,0.3350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,2,0.3375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,16,0.3374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,32,0.3307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,64,0.3334
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,128,0.3290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,256,0.3388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,512,0.3390
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,2048,0.3409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,1024,0.3369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,4096,0.4700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,8192,0.3615
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,16384,0.3725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,32768,0.3907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,65536,0.4470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,32,1,1,131072,0.5349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,4,0.3899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,8,0.3861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,2,0.3886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,16,0.3922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,32,0.3906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,64,0.3841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,128,0.3901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,256,0.3907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,512,0.3921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,1024,0.3873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,4096,0.5214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,2048,0.3990
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,8192,0.4285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,16384,0.4350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,32768,0.4724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,65536,0.5422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,2,0.4913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,64,1,1,131072,0.6726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,4,0.4916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,8,0.4944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,16,0.4919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,32,0.4870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,64,0.4946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,128,0.4951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,256,0.4924
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,512,0.4943
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,1024,0.4940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,2048,0.5050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,4096,0.6344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,8192,0.5359
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,16384,0.5686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,32768,0.6224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,65536,0.7281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,128,1,1,131072,0.9457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,4,0.7332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,2,0.7321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,8,0.7296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,16,0.7340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,32,0.7356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,64,0.7385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,128,0.7433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,256,0.7477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,512,0.7448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,2048,0.7572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,1024,0.7512
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,4096,0.9926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,8192,0.8217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,16384,0.8709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,32768,0.9779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,65536,1.1594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,4,1.1748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,256,1,1,131072,1.5918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,2,1.1748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,16,1.1763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,8,1.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,32,1.1798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,64,1.1983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,128,1.1905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,256,1.1905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,512,1.1918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,2048,1.2189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,1024,1.2012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,4096,1.6506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,8192,1.3323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,16384,1.4574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,32768,1.6396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,512,1,1,65536,2.0156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2,1.9940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4,1.9995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8,1.9950
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16,1.9957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32,2.0015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,64,2.0365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,128,2.0016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,256,2.0026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,512,2.0125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,1024,2.0378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,2048,2.0658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,4096,2.8178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,16384,2.5274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,8192,2.2686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,2,0.2128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,4,0.2210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,128,1024,1,1,32768,2.8819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,8,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,16,0.2244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,32,0.2246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,64,0.2135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,128,0.2136
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,512,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,1024,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,256,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,2048,0.2231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,4096,0.3411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,8192,0.2498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,16384,0.2389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,32768,0.2504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,131072,0.3226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,4,0.3038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1,1,1,65536,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,2,0.2979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,8,0.3036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,16,0.3036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,32,0.2852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,64,0.2848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,128,0.2986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,256,0.3027
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,512,0.3018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,1024,0.3083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,2048,0.3037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,4096,0.3176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,8192,0.3059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,16384,0.3202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,32768,0.3588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,65536,0.4236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,2,1,1,131072,0.5383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,4,0.2454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,2,0.2449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,8,0.2322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,16,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,32,0.2373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,64,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,256,0.2421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,128,0.2462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,512,0.2452
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,1024,0.2320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,2048,0.2451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,4096,0.3564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,8192,0.2576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,32768,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,16384,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,65536,0.3076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,4,1,1,131072,0.3572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,2,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,4,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,16,0.2510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,8,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,32,0.2500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,64,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,128,0.2468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,256,0.2505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,512,0.2415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,1024,0.2478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,2048,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,4096,0.3731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,8192,0.2760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,16384,0.2776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,32768,0.2913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,65536,0.3183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,8,1,1,131072,0.3677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,2,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,4,0.2636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,8,0.2719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,16,0.2735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,32,0.2708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,64,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,128,0.2614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,256,0.2679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,512,0.2616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,1024,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,2048,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,4096,0.3987
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,8192,0.2988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,16384,0.3005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,32768,0.3102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,65536,0.3534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,16,1,1,131072,0.4151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,2,0.2715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,4,0.2714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,8,0.2770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,16,0.2741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,32,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,64,0.2660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,128,0.2667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,256,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,512,0.2722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,1024,0.2723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,2048,0.2790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,8192,0.2959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,16384,0.3122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,32768,0.3346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,4096,0.3949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,131072,0.4661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,32,1,1,65536,0.3806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,2,0.2992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,4,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,8,0.2955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,16,0.2991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,32,0.2949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,64,0.2990
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,128,0.3012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,256,0.3014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,512,0.2996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,1024,0.3051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,2048,0.2976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,4096,0.4316
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,8192,0.3310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,16384,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,2,0.3553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,131072,0.5784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,32768,0.3815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,64,1,1,65536,0.4476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,4,0.3505
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,8,0.3491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,16,0.3545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,32,0.3531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,64,0.3533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,256,0.3545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,128,0.3552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,512,0.3532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,2048,0.3558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,1024,0.3508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,4096,0.4927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,8192,0.3995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,16384,0.4231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,32768,0.4774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,65536,0.5841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,2,0.4800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,8,0.4796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,4,0.4790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,128,1,1,131072,0.7959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,16,0.4787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,32,0.4790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,64,0.4837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,128,0.4824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,512,0.4855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,256,0.4838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,1024,0.4819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,2048,0.4964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,4096,0.7329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,8192,0.5523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,16384,0.6052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,32768,0.7040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,2,0.7184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,65536,0.8980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,256,1,1,131072,1.3263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,8,0.7099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,4,0.7116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,16,0.7121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,32,0.7140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,64,0.7186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,128,0.7314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,256,0.7218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,512,0.7282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,1024,0.7334
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,2048,0.7499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,4096,1.1962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,8192,0.8609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,16384,0.9739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,32768,1.1720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,512,1,1,65536,1.5434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2,1.1553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4,1.1545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8,1.1561
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16,1.1524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32,1.1548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,64,1.1590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,128,1.1574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,256,1.1628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,512,1.1921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,1024,1.1851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,2048,1.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,4096,1.9662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,8192,1.4186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,16384,1.6409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,2,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,4,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,16,0.2055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,64,1024,1,1,32768,2.0177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,8,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,32,0.2068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,64,0.2074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,128,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,256,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,512,0.2023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,1024,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,2048,0.2072
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,4096,0.3143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,8192,0.2312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,16384,0.2372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,65536,0.2658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,32768,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1,1,1,131072,0.3057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,4,0.2858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,8,0.2774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,2,0.2718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,32,0.2824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,16,0.2896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,64,0.2824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,128,0.2893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,256,0.2774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,512,0.2893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,1024,0.2791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,2048,0.2718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,4096,0.4171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,8192,0.3093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,16384,0.3153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,32768,0.3507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,65536,0.3914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,2,1,1,131072,0.5228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,2,0.2278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,4,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,8,0.2329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,16,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,32,0.2312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,64,0.2296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,128,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,256,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,1024,0.2273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,512,0.2193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,2048,0.2360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,8192,0.2571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,4096,0.3457
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,16384,0.2572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,32768,0.2592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,65536,0.2978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,4,1,1,131072,0.3368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,2,0.2322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,4,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,8,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,16,0.2380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,32,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,64,0.2397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,128,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,512,0.2307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,256,0.2354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,1024,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,2048,0.2416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,4096,0.3554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,8192,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,16384,0.2666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,32768,0.2796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,65536,0.3021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,8,1,1,131072,0.3664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,2,0.2614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,4,0.2554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,8,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,16,0.2504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,32,0.2582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,64,0.2557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,256,0.2567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,128,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,512,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,1024,0.2585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,2048,0.2627
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,4096,0.3736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,16384,0.2869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,8192,0.2866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,32768,0.3036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,131072,0.4154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,16,1,1,65536,0.3363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,2,0.2584
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,4,0.2607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,8,0.2515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,16,0.2575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,32,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,64,0.2597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,256,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,128,0.2521
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,512,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,1024,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,2048,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,8192,0.2880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,4096,0.3861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,16384,0.2998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,65536,0.3633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,32768,0.3232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,32,1,1,131072,0.4597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,2,0.2807
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,4,0.2773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,16,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,8,0.2806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,32,0.2830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,64,0.2778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,128,0.2854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,256,0.2839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,512,0.2843
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,2048,0.2890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,1024,0.2788
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,4096,0.4114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,8192,0.3201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,16384,0.3281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,32768,0.3676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,65536,0.4360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,64,1,1,131072,0.5651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,4,0.3286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,8,0.3278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,2,0.3249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,32,0.3227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,64,0.3306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,16,0.3294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,128,0.3314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,256,0.3306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,512,0.3306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,1024,0.3304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,2048,0.3299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,4096,0.4599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,8192,0.3777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,16384,0.4005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,32768,0.4523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,65536,0.5623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,2,0.4411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,4,0.4426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,128,1,1,131072,0.7774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,8,0.4422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,16,0.4383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,32,0.4419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,64,0.4449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,128,0.4430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,256,0.4441
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,512,0.4442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,1024,0.4504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,2048,0.4593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,4096,0.6852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,8192,0.5173
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,16384,0.5700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,32768,0.6679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,2,0.6374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,65536,0.8675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,256,1,1,131072,1.2852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,4,0.6378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,8,0.6361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,16,0.6395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,32,0.6419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,64,0.6470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,128,0.6469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,256,0.6568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,512,0.6513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,1024,0.6614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,2048,0.6743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,4096,1.1121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,16384,0.8963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,8192,0.7870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,32768,1.0896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,512,1,1,65536,1.4828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2,1.0240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8,1.0271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16,1.0265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4,1.0277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32,1.0330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,64,1.0372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,128,1.0372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,256,1.0559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,512,1.0458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,4096,1.8389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,2048,1.0917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,1024,1.0615
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,8192,1.2981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,16384,1.5149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,2,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,32,1024,1,1,32768,1.8923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,4,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,8,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,16,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,32,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,64,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,128,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,256,0.1891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,512,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,1024,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,2048,0.1866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,4096,0.3044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,8192,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,16384,0.2184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,32768,0.2356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,65536,0.2564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1,1,1,131072,0.2939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,2,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,4,0.2565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,8,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,16,0.2698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,32,0.2749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,64,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,128,0.2641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,256,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,512,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,1024,0.2619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,2048,0.2714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,4096,0.4018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,8192,0.2918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,16384,0.3032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,32768,0.3256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,65536,0.3870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,2,1,1,131072,0.4986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,2,0.2259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,4,0.2176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,8,0.2184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,16,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,32,0.2188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,64,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,128,0.2192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,256,0.2083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,512,0.2259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,2048,0.2190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,4096,0.3367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,1024,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,8192,0.2476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,16384,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,32768,0.2501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,65536,0.2933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,4,1,1,131072,0.3361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,2,0.2280
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,4,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,8,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,16,0.2260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,32,0.2190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,64,0.2177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,128,0.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,256,0.2258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,512,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,2048,0.2300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,1024,0.2251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,4096,0.3418
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,8192,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,16384,0.2520
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,32768,0.2735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,65536,0.3005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,8,1,1,131072,0.3609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,4,0.2439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,2,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,8,0.2375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,16,0.2452
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,32,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,64,0.2498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,128,0.2441
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,256,0.2468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,512,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,1024,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,2048,0.2420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,4096,0.3720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,8192,0.2681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,16384,0.2838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,32768,0.2966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,65536,0.3351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,16,1,1,131072,0.3998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,2,0.2479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,4,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,8,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,16,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,32,0.2440
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,64,0.2491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,128,0.2471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,256,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,512,0.2411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,1024,0.2485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,2048,0.2542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,4096,0.3693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,8192,0.2791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,16384,0.2884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,32768,0.3116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,65536,0.3554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,32,1,1,131072,0.4421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,2,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,4,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,8,0.2629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,16,0.2685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,32,0.2695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,64,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,128,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,256,0.2641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,512,0.2708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,1024,0.2720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,4096,0.4024
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,2048,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,8192,0.3071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,16384,0.3208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,32768,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,65536,0.4213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,64,1,1,131072,0.5527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,2,0.3050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,4,0.3044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,8,0.3010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,16,0.3099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,32,0.3070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,64,0.3034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,128,0.3071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,256,0.3102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,512,0.3125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,1024,0.3124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,2048,0.3098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,4096,0.4478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,8192,0.3570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,32768,0.4267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,65536,0.5408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,2,0.4117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,4,0.4073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,16384,0.3799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,128,1,1,131072,0.7545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,8,0.4124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,16,0.4128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,32,0.4125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,64,0.4127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,128,0.4112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,256,0.4155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,512,0.4156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,2048,0.4294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,1024,0.4196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,4096,0.6578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,8192,0.4901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,16384,0.5356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,32768,0.6435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,65536,0.8386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,2,0.5851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,4,0.5849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,256,1,1,131072,1.2595
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,8,0.5891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,16,0.5834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,32,0.5865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,64,0.5905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,128,0.5944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,256,0.5953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,512,0.5994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,1024,0.6043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,2048,0.6230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,4096,1.0567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,8192,0.7275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,16384,0.8389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,32768,1.0396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2,0.9103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4,0.9101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,512,1,1,65536,1.4231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8,0.9247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16,0.9081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32,0.9126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,64,0.9189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,128,0.9200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,256,0.9197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,512,0.9302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,1024,0.9446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,2048,0.9848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,4096,1.7211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,8192,1.1740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,16384,1.3975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,2,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,4,0.1898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,16,1024,1,1,32768,1.7751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,16,0.1871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,8,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,32,0.1797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,256,0.1834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,128,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,64,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,512,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,1024,0.1799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,2048,0.1787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,4096,0.2891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,8192,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,16384,0.2066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,32768,0.2171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,65536,0.2451
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1,1,1,131072,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,4,0.2322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,8,0.2559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,2,0.2471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,16,0.2401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,32,0.2490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,64,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,128,0.2423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,512,0.2404
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,1024,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,256,0.2314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,4096,0.2663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,2048,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,8192,0.2716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,16384,0.2804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,32768,0.3006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,65536,0.3718
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,2,1,1,131072,0.4820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,2,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,4,0.2004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,8,0.2175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,16,0.2164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,32,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,64,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,128,0.2094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,512,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,256,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,1024,0.2031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,2048,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,4096,0.3248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,8192,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,16384,0.2370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,32768,0.2560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,131072,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,2,0.2214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,4,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,4,1,1,65536,0.2732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,8,0.2184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,16,0.2218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,32,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,64,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,512,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,256,0.2216
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,1024,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,2048,0.2208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,128,0.2164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,4096,0.3381
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,8192,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,16384,0.2503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,32768,0.2608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,65536,0.2992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,8,1,1,131072,0.3423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,2,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,4,0.2357
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,8,0.2408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,16,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,32,0.2393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,64,0.2332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,128,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,256,0.2300
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,512,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,1024,0.2380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,2048,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,4096,0.3673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,8192,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,16384,0.2787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,32768,0.2851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,65536,0.3214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,16,1,1,131072,0.3980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,2,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,4,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,8,0.2398
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,32,0.2408
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,64,0.2356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,16,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,128,0.2307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,256,0.2395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,512,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,1024,0.2440
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,2048,0.2466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,8192,0.2697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,16384,0.2845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,32768,0.2997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,65536,0.3530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,131072,0.4425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,32,1,1,4096,0.3715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,2,0.2609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,4,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,8,0.2611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,16,0.2607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,32,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,128,0.2590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,256,0.2645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,64,0.2607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,512,0.2640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,1024,0.2605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,2048,0.2699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,4096,0.3961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,16384,0.3178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,32768,0.3456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,65536,0.4153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,8192,0.2955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,64,1,1,131072,0.5456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,2,0.2900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,4,0.2972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,8,0.2942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,16,0.2953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,32,0.2961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,64,0.2907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,128,0.2953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,256,0.2974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,512,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,1024,0.3045
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,2048,0.3081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,4096,0.4360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,8192,0.3429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,16384,0.3722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,32768,0.4217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,65536,0.5311
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,128,1,1,131072,0.7446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,2,0.3965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,4,0.3960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,8,0.3960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,16,0.3955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,64,0.3952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,32,0.3966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,128,0.3967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,256,0.3960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,512,0.4002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,1024,0.4051
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,2048,0.4157
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,4096,0.6449
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,16384,0.5252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,8192,0.4719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,32768,0.6219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,65536,0.8166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,256,1,1,131072,1.2421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,2,0.5589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,8,0.5634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,4,0.5596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,16,0.5596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,32,0.5590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,64,0.5604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,128,0.5683
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,256,0.5684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,512,0.5708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,1024,0.5801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,2048,0.5940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,4096,1.0348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,8192,0.7006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,16384,0.8140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,32768,1.0097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,512,1,1,65536,1.3921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2,0.8563
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4,0.8651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8,0.8560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32,0.8571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16,0.8525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,64,0.8589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,128,0.8629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,256,0.8645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,512,0.8832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,1024,0.8879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,2048,0.9171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,4096,1.6663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,16384,1.3394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,8192,1.1191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,2,0.1823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,4,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,8,1024,1,1,32768,1.7212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,8,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,16,0.1840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,32,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,64,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,128,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,256,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,512,0.1744
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,1024,0.1793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,2048,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,4096,0.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,8192,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,16384,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,32768,0.2190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,65536,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1,1,1,131072,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,2,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,4,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,8,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,16,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,32,0.2413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,64,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,128,0.2313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,256,0.2494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,512,0.2450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,1024,0.2426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,2048,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,4096,0.2534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,8192,0.2642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,16384,0.2755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,32768,0.2871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,65536,0.3751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,2,1,1,131072,0.4877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,2,0.2091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,4,0.1958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,16,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,32,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,8,0.2153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,64,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,128,0.2074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,256,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,1024,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,512,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,2048,0.2060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,4096,0.3210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,8192,0.2373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,16384,0.2198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,32768,0.2477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,65536,0.2684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,4,1,1,131072,0.3276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,2,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,8,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,4,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,16,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,32,0.2067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,64,0.2167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,128,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,256,0.2188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,512,0.2175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,1024,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,2048,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,4096,0.3305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,8192,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,16384,0.2481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,32768,0.2574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,65536,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,8,1,1,131072,0.3523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,2,0.2314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,8,0.2351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,16,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,4,0.2353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,32,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,64,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,128,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,256,0.2393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,512,0.2317
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,1024,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,2048,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,4096,0.3639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,8192,0.2657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,16384,0.2635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,32768,0.2909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,65536,0.3312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,16,1,1,131072,0.3937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,2,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,4,0.2361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,8,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,16,0.2372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,32,0.2368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,64,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,128,0.2373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,256,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,512,0.2269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,1024,0.2384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,2048,0.2380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,4096,0.3706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,8192,0.2717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,16384,0.2821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,32768,0.3047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,65536,0.3492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,2,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,32,1,1,131072,0.4339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,8,0.2582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,4,0.2564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,16,0.2538
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,64,0.2574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,32,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,128,0.2490
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,256,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,512,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,1024,0.2637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,2048,0.2644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,4096,0.3906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,8192,0.3015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,16384,0.3053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,32768,0.3456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,65536,0.4111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,64,1,1,131072,0.5389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,2,0.2898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,4,0.2881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,8,0.2890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,16,0.2921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,32,0.2835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,64,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,128,0.2916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,256,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,512,0.2986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,2048,0.3023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,1024,0.2989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,4096,0.4332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,8192,0.3372
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,16384,0.3633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,32768,0.4138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,65536,0.5295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,2,0.3872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,4,0.3881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,8,0.3886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,128,1,1,131072,0.7510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,16,0.3869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,32,0.3850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,64,0.3879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,128,0.3919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,256,0.3930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,512,0.3923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,1024,0.3980
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,2048,0.4052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,4096,0.6323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,16384,0.5189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,8192,0.4620
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,32768,0.6184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,65536,0.8126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,2,0.5458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,256,1,1,131072,1.2376
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,4,0.5463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,8,0.5427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,16,0.5499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,32,0.5453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,64,0.5495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,128,0.5535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,256,0.5551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,512,0.5576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,2048,0.5803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,1024,0.5630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,4096,1.0244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,8192,0.6873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,16384,0.8036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,32768,0.9955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,512,1,1,65536,1.3863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2,0.8273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4,0.8246
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8,0.8283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16,0.8386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32,0.8269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,64,0.8321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,128,0.8342
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,256,0.8351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,512,0.8464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,2048,0.9002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,1024,0.8559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,4096,1.6377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,16384,1.3151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,2,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,8192,1.0886
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,4,1024,1,1,32768,1.6951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,4,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,8,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,16,0.1694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,32,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,64,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,128,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,256,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,512,0.1779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,1024,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,2048,0.1712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,4096,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,8192,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,16384,0.2070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,32768,0.2154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,65536,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1,1,1,131072,0.2775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,2,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,8,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,16,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,4,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,32,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,64,0.2356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,128,0.2418
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,256,0.2371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,512,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,1024,0.2231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,2048,0.2325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,4096,0.2468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,8192,0.2611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,16384,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,32768,0.2994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,65536,0.3736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,2,1,1,131072,0.4947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,2,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,4,0.1939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,8,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,16,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,32,0.2020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,64,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,256,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,512,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,128,0.2031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,1024,0.1951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,2048,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,4096,0.3248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,8192,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,16384,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,32768,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,65536,0.2741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,4,1,1,131072,0.3121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,2,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,4,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,8,0.2184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,16,0.2152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,32,0.2094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,64,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,128,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,256,0.2127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,512,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,1024,0.2134
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,2048,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,4096,0.3302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,8192,0.2332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,16384,0.2358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,32768,0.2610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,65536,0.2934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,8,1,1,131072,0.3510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,2,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,4,0.2338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,8,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,16,0.2238
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,32,0.2212
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,64,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,128,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,256,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,512,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,1024,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,2048,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,4096,0.3499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,8192,0.2522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,16384,0.2699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,32768,0.2913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,65536,0.3255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,16,1,1,131072,0.3905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,2,0.2347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,4,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,8,0.2256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,16,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,32,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,64,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,256,0.2287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,512,0.2353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,128,0.2331
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,2048,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,1024,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,8192,0.2699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,4096,0.3511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,16384,0.2766
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,32768,0.3014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,65536,0.3477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,32,1,1,131072,0.4396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,2,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,4,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,8,0.2542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,16,0.2464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,32,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,64,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,128,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,256,0.2483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,512,0.2589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,1024,0.2614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,2048,0.2635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,4096,0.3820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,8192,0.2956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,32768,0.3365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,65536,0.4102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,16384,0.3094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,64,1,1,131072,0.5407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,2,0.2861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,4,0.2880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,8,0.2798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,16,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,32,0.2835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,64,0.2875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,128,0.2874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,512,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,256,0.2878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,2048,0.2941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,1024,0.2977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,4096,0.4250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,16384,0.3648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,8192,0.3401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,32768,0.4164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,65536,0.5256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,128,1,1,131072,0.7486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,2,0.3820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,4,0.3827
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,8,0.3791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,16,0.3834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,32,0.3829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,64,0.3839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,128,0.3857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,256,0.3876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,512,0.3857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,1024,0.3925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,2048,0.4022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,4096,0.6344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,8192,0.4613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,16384,0.5133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,32768,0.6137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,65536,0.8113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,2,0.5369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,4,0.5385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,256,1,1,131072,1.2333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,8,0.5373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,32,0.5414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,16,0.5399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,64,0.5434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,128,0.5444
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,256,0.5479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,512,0.5552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,1024,0.5588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,2048,0.5704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,4096,1.0114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,8192,0.6827
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,16384,0.7960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,32768,0.9877
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2,0.8219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,512,1,1,65536,1.3712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4,0.8100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8,0.8119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16,0.8145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32,0.8158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,64,0.8185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,128,0.8165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,256,0.8332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,512,0.8307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,2048,0.8729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,1024,0.8434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,4096,1.6277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,16384,1.3002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,8192,1.0759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,2,1024,1,1,32768,1.6791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,2,0.1693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,4,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,8,0.1800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,16,0.1737
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,32,0.1772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,64,0.1676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,128,0.1775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,256,0.1690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,512,0.1806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,1024,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,2048,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,4096,0.2878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,8192,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,16384,0.1922
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,32768,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,65536,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1,1,1,131072,0.2765
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,8,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,16,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,32,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,64,0.2334
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,128,0.2277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,4,0.2437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,2,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,256,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,512,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,1024,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,4096,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,8192,0.2623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,2048,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,16384,0.2638
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,32768,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,65536,0.3587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,2,1,1,131072,0.4588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,2,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,4,0.1972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,8,0.1866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,16,0.2035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,32,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,64,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,128,0.1920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,256,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,512,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,1024,0.2049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,2048,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,4096,0.3115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,8192,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,16384,0.2175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,32768,0.2419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,65536,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,4,1,1,131072,0.3179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,2,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,8,0.2095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,4,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,16,0.2090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,32,0.2116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,64,0.2012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,128,0.2084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,256,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,512,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,1024,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,2048,0.2096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,4096,0.3244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,8192,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,16384,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,32768,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,65536,0.2907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,8,1,1,131072,0.3329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,2,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,4,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,8,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,16,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,32,0.2310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,64,0.2265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,128,0.2322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,256,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,512,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,1024,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,2048,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,4096,0.3613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,8192,0.2616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,16384,0.2640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,32768,0.2881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,65536,0.3138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,16,1,1,131072,0.3840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,2,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,4,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,8,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,16,0.2284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,32,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,64,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,256,0.2222
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,512,0.2291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,128,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,1024,0.2331
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,2048,0.2382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,4096,0.3616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,8192,0.2686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,16384,0.2778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,32768,0.2918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,65536,0.3394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,32,1,1,131072,0.4322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,2,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,4,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,8,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,16,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,32,0.2525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,64,0.2439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,128,0.2439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,256,0.2510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,512,0.2535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,1024,0.2585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,2048,0.2588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,4096,0.3880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,8192,0.2972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,16384,0.2989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,32768,0.3326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,65536,0.4070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,64,1,1,131072,0.5391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,2,0.2848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,4,0.2808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,16,0.2755
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,8,0.2850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,32,0.2777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,64,0.2848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,128,0.2837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,256,0.2891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,512,0.2890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,1024,0.2917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,2048,0.2913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,4096,0.4225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,8192,0.3402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,16384,0.3636
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,32768,0.4133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,65536,0.5248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,128,1,1,131072,0.7379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,2,0.3785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,4,0.3772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,8,0.3801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,16,0.3827
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,32,0.3832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,64,0.3796
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,128,0.3848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,256,0.3868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,512,0.3874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,1024,0.3885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,2048,0.3998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,4096,0.6328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,8192,0.4554
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,16384,0.5125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,32768,0.6105
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,65536,0.8053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,2,0.5374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,4,0.5330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,8,0.5354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,256,1,1,131072,1.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,16,0.5309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,32,0.5350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,64,0.5380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,128,0.5423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,256,0.5474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,512,0.5486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,2048,0.5689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,1024,0.5515
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,4096,1.0012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,8192,0.6798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,16384,0.7893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,32768,0.9894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,512,1,1,65536,1.3700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2,0.8066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4,0.8036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8,0.8057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16,0.8084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32,0.8058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,64,0.8113
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,128,0.8123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,256,0.8266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,512,0.8249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,1024,0.8371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,2048,0.8629
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,4096,1.6247
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,8192,1.0681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,16384,1.2934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,2,0.2276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,fp8_block,1,1024,1,1,32768,1.6793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,4,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,8,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,16,0.2287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,32,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,64,0.2329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,128,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,256,0.2333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,512,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,1024,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,2048,0.2353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,4096,0.2478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,8192,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,16384,0.2681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,32768,0.2870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,65536,0.3591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,2,0.2402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1,1,1,131072,0.4704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,4,0.2472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,8,0.2383
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,16,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,32,0.2454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,64,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,128,0.2471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,256,0.2389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,512,0.2462
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,1024,0.2461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,2048,0.2499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,4096,0.3762
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,8192,0.2735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,16384,0.2747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,32768,0.3016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,65536,0.3709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,2,1,1,131072,0.4742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,2,0.2527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,4,0.2485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,8,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,16,0.2421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,32,0.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,64,0.2421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,128,0.2496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,256,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,512,0.2521
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,1024,0.2506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,2048,0.2535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,4096,0.2626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,8192,0.2777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,16384,0.2784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,32768,0.3133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,131072,0.4887
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,2,0.2571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,4,1,1,65536,0.3826
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,4,0.2572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,8,0.2523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,16,0.2553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,32,0.2525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,64,0.2565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,128,0.2599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,256,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,512,0.2590
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,1024,0.2544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,2048,0.2596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,4096,0.3930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,8192,0.2816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,32768,0.3272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,131072,0.5135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,16384,0.2988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,8,1,1,65536,0.3956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,2,0.2727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,8,0.2756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,4,0.2748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,16,0.2769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,32,0.2775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,128,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,256,0.2759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,64,0.2771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,512,0.2719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,2048,0.2816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,1024,0.2778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,4096,0.4149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,8192,0.3099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,32768,0.3487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,16384,0.3192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,65536,0.4237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,16,1,1,131072,0.5529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,4,0.2793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,2,0.2745
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,8,0.2814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,32,0.2794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,64,0.2800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,128,0.2804
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,16,0.2780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,256,0.2757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,1024,0.2838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,512,0.2760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,2048,0.2913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,4096,0.4201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,16384,0.3303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,8192,0.3112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,32768,0.3667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,65536,0.4529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,2,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,32,1,1,131072,0.6054
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,4,0.3028
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,8,0.3119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,16,0.2966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,32,0.3003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,64,0.2992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,128,0.3012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,256,0.2965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,512,0.3025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,1024,0.3083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,2048,0.3087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,4096,0.4475
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,8192,0.3440
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,16384,0.3656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,32768,0.4130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,65536,0.5127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,64,1,1,131072,0.7123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,2,0.3702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,4,0.3713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,8,0.3679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,16,0.3794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,32,0.3671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,64,0.3639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,128,0.3670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,256,0.3708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,512,0.3747
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,1024,0.3774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,2048,0.3932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,4096,0.5332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,8192,0.4339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,16384,0.4622
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,32768,0.5337
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,65536,0.6791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,128,1,1,131072,0.9601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,2,0.5085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,4,0.5021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,8,0.5099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,16,0.4992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,32,0.5008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,64,0.5021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,128,0.5067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,256,0.5118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,512,0.5272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,1024,0.5373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,2048,0.5607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,4096,0.8138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,8192,0.6224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,16384,0.6790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,32768,0.7983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,65536,1.0310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,2,0.7785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,256,1,1,131072,1.5929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,4,0.8800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,8,0.7787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,16,0.7779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,32,0.7785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,64,0.7821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,128,0.7864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,256,0.8502
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,512,0.8088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,1024,0.8397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,2048,0.8832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,8192,0.9953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,4096,1.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,16384,1.1227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,32768,1.3585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,2,1.3464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,4,1.4201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,512,1,1,65536,1.8257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,8,1.3495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,16,1.3550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,32,1.3600
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,128,1.3724
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,64,1.3668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,256,1.3851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,512,1.4314
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,1024,1.4526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,2048,1.5466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,4096,2.0975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,16384,2.0132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,2,0.2042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,4,0.2107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,8192,1.7648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,128,1024,1,1,32768,2.4688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,8,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,16,0.2035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,32,0.2082
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,64,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,128,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,256,0.2091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,512,0.2128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,1024,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,2048,0.2069
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,4096,0.2216
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,8192,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,16384,0.2428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,65536,0.3347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,32768,0.2741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,2,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1,1,1,131072,0.4332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,4,0.2229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,16,0.2234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,8,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,32,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,64,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,128,0.2199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,256,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,512,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,1024,0.2239
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,2048,0.2275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,4096,0.3570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,8192,0.2396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,16384,0.2541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,131072,0.4571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,65536,0.3433
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,2,1,1,32768,0.2750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,2,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,4,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,8,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,16,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,128,0.2257
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,32,0.2244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,256,0.2196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,64,0.2265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,512,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,1024,0.2288
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,2048,0.2236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,4096,0.3589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,8192,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,32768,0.2897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,65536,0.3556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,16384,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,4,1,1,131072,0.4690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,2,0.2335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,4,0.2295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,8,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,16,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,32,0.2299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,64,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,128,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,256,0.2344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,512,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,1024,0.2290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,2048,0.2358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,4096,0.3665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,16384,0.2723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,32768,0.2939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,8192,0.2624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,65536,0.3697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,8,1,1,131072,0.4913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,2,0.2477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,4,0.2524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,16,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,8,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,64,0.2488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,32,0.2467
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,128,0.2506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,256,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,512,0.2501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,1024,0.2541
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,2048,0.2587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,4096,0.3884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,8192,0.2812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,16384,0.2902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,32768,0.3261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,65536,0.4020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,16,1,1,131072,0.5294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,2,0.2513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,4,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,8,0.2510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,32,0.2484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,64,0.2529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,16,0.2464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,128,0.2531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,256,0.2482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,512,0.2519
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,1024,0.2549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,2048,0.2611
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,4096,0.3895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,8192,0.2872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,16384,0.2979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,32768,0.3414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,65536,0.4193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,32,1,1,131072,0.5782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,2,0.2661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,4,0.2666
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,8,0.2650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,16,0.2699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,32,0.2620
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,64,0.2619
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,128,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,256,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,512,0.2765
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,2048,0.2775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,1024,0.2714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,8192,0.3068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,4096,0.4129
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,16384,0.3273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,32768,0.3797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,65536,0.4834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,64,1,1,131072,0.6811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,2,0.3193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,8,0.3190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,4,0.3160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,16,0.3352
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,32,0.3169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,128,0.3201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,64,0.3211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,256,0.3232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,512,0.3249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,1024,0.3285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,2048,0.3412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,4096,0.4852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,8192,0.3872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,32768,0.4846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,16384,0.4187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,65536,0.6321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,128,1,1,131072,0.9102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,2,0.4196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,4,0.4209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,8,0.4494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,16,0.4230
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,32,0.4225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,64,0.4217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,128,0.4251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,256,0.4291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,512,0.4333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,1024,0.4494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,2048,0.4760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,4096,0.7315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,16384,0.5943
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,8192,0.5385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,32768,0.7132
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,65536,0.9487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,256,1,1,131072,1.5084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,2,0.6279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,4,0.7060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,8,0.6294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,32,0.6373
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,16,0.6319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,64,0.6422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,128,0.6441
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,256,0.6729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,512,0.6683
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,1024,0.7022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,2048,0.7430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,4096,1.1306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,8192,0.8560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,16384,0.9812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,32768,1.2148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,512,1,1,65536,1.6713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,2,1.0797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,8,1.1616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,4,1.0862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,16,1.0914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,32,1.0960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,64,1.1008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,128,1.1023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,256,1.1187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,512,1.1426
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,2048,1.2898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,1024,1.2154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,4096,1.8410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,8192,1.5106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,16384,1.7558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,4,0.1816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,64,1024,1,1,32768,2.2012
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,2,0.1866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,8,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,16,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,32,0.1862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,64,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,128,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,256,0.1816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,512,0.1890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,1024,0.1806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,2048,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,8192,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,16384,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,4096,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,32768,0.2504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,65536,0.3004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,2,0.1883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1,1,1,131072,0.4175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,4,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,8,0.1953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,16,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,32,0.1961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,128,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,64,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,256,0.1982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,512,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,1024,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,2048,0.1981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,4096,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,8192,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,16384,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,32768,0.2572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,65536,0.3260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,2,0.2035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,2,1,1,131072,0.4294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,8,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,4,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,16,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,32,0.1897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,64,0.2004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,128,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,256,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,512,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,1024,0.2053
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,2048,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,8192,0.2174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,4096,0.3327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,16384,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,32768,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,65536,0.3275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,4,1,1,131072,0.4502
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,2,0.2096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,4,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,8,0.2010
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,16,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,32,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,64,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,128,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,256,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,512,0.2108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,1024,0.2143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,2048,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,4096,0.3500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,8192,0.2440
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,16384,0.2575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,32768,0.2836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,65536,0.3562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,8,1,1,131072,0.4776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,2,0.2271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,4,0.2229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,8,0.2296
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,32,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,16,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,64,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,128,0.2292
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,256,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,512,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,2048,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,4096,0.3769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,8192,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,1024,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,16384,0.2764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,32768,0.3103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,65536,0.3871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,4,0.2295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,16,1,1,131072,0.5135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,8,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,16,0.2271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,32,0.2233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,2,0.2271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,64,0.2285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,128,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,256,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,2048,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,512,0.2275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,4096,0.3725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,8192,0.2702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,1024,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,16384,0.2849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,32768,0.3240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,65536,0.4030
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,32,1,1,131072,0.5608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,2,0.2348
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,4,0.2394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,8,0.2421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,16,0.2517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,32,0.2395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,64,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,128,0.2379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,256,0.2435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,512,0.2425
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,1024,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,2048,0.2559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,4096,0.3894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,8192,0.2879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,16384,0.3048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,32768,0.3578
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,65536,0.4610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,64,1,1,131072,0.6526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,2,0.2844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,4,0.2935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,8,0.2874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,16,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,32,0.2875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,64,0.2834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,128,0.2867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,256,0.2880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,512,0.2926
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,1024,0.3019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,2048,0.3122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,4096,0.4531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,16384,0.3844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,8192,0.3503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,32768,0.4513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,65536,0.5947
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,128,1,1,131072,0.8750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,2,0.3822
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,4,0.3723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,8,0.4049
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,16,0.3706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,32,0.3698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,64,0.3706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,128,0.3706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,256,0.3852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,512,0.3999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,1024,0.3970
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,2048,0.4224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,4096,0.6748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,8192,0.4849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,16384,0.5386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,32768,0.6613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,2,0.5203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,4,0.6088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,65536,0.8972
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,256,1,1,131072,1.4551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,8,0.5196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,16,0.5220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,32,0.5208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,64,0.5255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,256,0.5388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,128,0.5306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,512,0.5675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,2048,0.6228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,1024,0.5734
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,4096,1.0109
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,8192,0.7365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,16384,0.8625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,32768,1.0985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,2,1.0281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,8,0.8649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,16,0.8723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,512,1,1,65536,1.5622
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,4,0.8617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,32,0.8780
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,64,0.8862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,128,0.8870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,256,0.9362
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,512,0.9293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,2048,1.0865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,1024,0.9769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,4096,1.6263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,8192,1.2910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,16384,1.5350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,32,1024,1,1,32768,1.9819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,2,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,4,0.1793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,8,0.1777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,16,0.1802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,32,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,64,0.1691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,128,0.1839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,256,0.1713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,512,0.1828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,1024,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,2048,0.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,4096,0.1959
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,8192,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,16384,0.2004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,32768,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,65536,0.3044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,2,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1,1,1,131072,0.4087
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,4,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,8,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,16,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,32,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,64,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,128,0.1897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,256,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,512,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,2048,0.1965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,4096,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,1024,0.1921
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,8192,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,16384,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,32768,0.2486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,65536,0.3101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,2,1,1,131072,0.4269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,2,0.1930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,4,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,8,0.1837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,16,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,32,0.1856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,64,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,128,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,256,0.1994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,1024,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,2048,0.1856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,512,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,4096,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,8192,0.2073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,16384,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,32768,0.2524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,65536,0.3256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,4,1,1,131072,0.4422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,2,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,4,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,8,0.1963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,16,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,32,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,64,0.2036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,128,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,256,0.2018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,512,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,2048,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,1024,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,4096,0.2198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,8192,0.2210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,16384,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,32768,0.2760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,65536,0.3474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,8,1,1,131072,0.4616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,2,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,4,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,16,0.2143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,8,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,32,0.2185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,64,0.2226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,128,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,256,0.2146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,512,0.2164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,1024,0.2151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,2048,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,4096,0.3624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,8192,0.2571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,16384,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,32768,0.3037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,65536,0.3732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,16,1,1,131072,0.5097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,2,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,4,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,8,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,16,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,32,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,64,0.2152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,128,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,256,0.2166
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,512,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,1024,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,2048,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,4096,0.3685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,8192,0.2642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,16384,0.2756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,32768,0.3123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,65536,0.3965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,32,1,1,131072,0.5465
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,2,0.2275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,4,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,8,0.2260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,16,0.2252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,64,0.2208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,32,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,128,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,256,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,512,0.2381
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,2048,0.2454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,1024,0.2410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,4096,0.3800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,8192,0.2730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,16384,0.2993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,32768,0.3435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,65536,0.4450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,2,0.2676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,64,1,1,131072,0.6432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,4,0.2670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,8,0.2689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,16,0.2624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,32,0.2663
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,64,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,128,0.2696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,256,0.2742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,512,0.2803
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,1024,0.2905
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,2048,0.2975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,4096,0.4310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,16384,0.3664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,8192,0.3336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,32768,0.4362
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,65536,0.5806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,2,0.3469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,128,1,1,131072,0.8594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,8,0.3571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,16,0.3417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,4,0.3429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,32,0.3432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,64,0.3570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,128,0.3488
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,256,0.3743
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,1024,0.3721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,2048,0.3960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,512,0.3564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,8192,0.4571
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,4096,0.6479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,16384,0.5155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,32768,0.6319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,65536,0.8673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,2,0.5430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,4,0.5139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,256,1,1,131072,1.4319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,16,0.4721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,8,0.4711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,32,0.4714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,64,0.4754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,128,0.5494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,256,0.4865
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,512,0.4991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,2048,0.5702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,4096,1.0654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,1024,0.5330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,8192,0.6868
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,16384,0.8123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,32768,1.0461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,2,0.7565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,512,1,1,65536,1.5086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,4,0.7564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,8,0.7572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,16,0.8375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,32,0.7648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,64,0.7717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,128,0.8983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,256,0.7880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,512,0.8676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,2048,0.9631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,1024,0.8613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,4096,1.5193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,8192,1.1790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,16384,1.4229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,4,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,16,1024,1,1,32768,1.8767
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,2,0.1654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,8,0.1719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,16,0.1664
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,32,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,64,0.1809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,128,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,256,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,512,0.1659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,1024,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,2048,0.1708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,4096,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,8192,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,16384,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,32768,0.2295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,65536,0.2945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1,1,1,131072,0.3923
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,2,0.1883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,4,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,8,0.1784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,16,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,32,0.1884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,64,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,256,0.1772
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,128,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,512,0.1912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,1024,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,2048,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,4096,0.3170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,8192,0.2093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,16384,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,32768,0.2360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,65536,0.3036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,2,1,1,131072,0.4217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,2,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,8,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,4,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,16,0.1939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,32,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,64,0.1795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,128,0.1895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,256,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,512,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,1024,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,2048,0.1885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,4096,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,16384,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,8192,0.2095
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,32768,0.2478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,65536,0.3186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,4,1,1,131072,0.4285
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,2,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,4,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,8,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,16,0.1988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,32,0.1983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,64,0.2006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,128,0.2003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,256,0.1917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,512,0.1919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,1024,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,2048,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,8192,0.2226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,16384,0.2401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,4096,0.3307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,32768,0.2591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,65536,0.3432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,8,1,1,131072,0.4559
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,2,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,4,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,16,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,8,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,32,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,64,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,128,0.2103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,256,0.2178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,512,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,1024,0.2168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,2048,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,4096,0.3549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,8192,0.2495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,16384,0.2614
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,32768,0.2932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,65536,0.3729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,16,1,1,131072,0.5075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,2,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,4,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,16,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,128,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,8,0.2127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,256,0.2156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,64,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,32,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,512,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,1024,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,2048,0.2291
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,8192,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,4096,0.3644
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,16384,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,32768,0.3067
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,65536,0.3966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,32,1,1,131072,0.5484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,2,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,8,0.2142
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,16,0.2292
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,4,0.2175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,32,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,64,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,256,0.2185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,128,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,512,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,1024,0.2308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,2048,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,4096,0.3728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,8192,0.2695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,32768,0.3395
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,16384,0.2932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,65536,0.4458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,64,1,1,131072,0.6379
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,2,0.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,4,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,8,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,32,0.2583
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,16,0.2542
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,64,0.2700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,128,0.2585
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,256,0.2635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,512,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,1024,0.2791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,2048,0.2879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,4096,0.4272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,8192,0.3258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,16384,0.3601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,32768,0.4278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,65536,0.5708
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,128,1,1,131072,0.8503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,2,0.3643
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,4,0.3312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,8,0.3295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,16,0.3266
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,32,0.3534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,64,0.3303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,128,0.3326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,256,0.3419
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,512,0.3476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,1024,0.3592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,2048,0.3822
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,4096,0.6355
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,8192,0.4422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,16384,0.5013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,32768,0.6192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,65536,0.8553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,256,1,1,131072,1.4152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,2,0.5127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,4,0.4473
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,8,0.4477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,16,0.4475
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,32,0.4475
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,64,0.4489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,128,0.4563
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,256,0.4637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,512,0.5120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,1024,0.4983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,4096,1.0396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,16384,0.7846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,8192,0.6608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,2048,0.5507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,32768,1.0244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,512,1,1,65536,1.4850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,2,0.7064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,8,0.7083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,4,0.7042
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,16,0.7089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,32,0.7151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,64,0.7229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,128,0.8233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,256,0.8232
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,512,0.7634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,2048,0.9146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,1024,0.8089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,4096,1.4661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,8192,1.1264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,16384,1.3762
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,4,0.1650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,8,0.1648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,2,0.1716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,8,1024,1,1,32768,1.8277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,16,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,32,0.1753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,64,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,128,0.1753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,256,0.1651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,512,0.1649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,1024,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,2048,0.1774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,4096,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,8192,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,32768,0.2299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,16384,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,65536,0.2838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1,1,1,131072,0.3915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,2,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,4,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,8,0.1823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,16,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,32,0.1866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,64,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,128,0.1752
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,256,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,512,0.1816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,1024,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,2048,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,4096,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,8192,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,32768,0.2310
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,16384,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,65536,0.2941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,2,1,1,131072,0.4112
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,2,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,8,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,16,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,4,0.1869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,32,0.1798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,64,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,128,0.1897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,256,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,512,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,1024,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,2048,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,8192,0.1993
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,4096,0.3161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,16384,0.2093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,32768,0.2472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,65536,0.3135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,4,1,1,131072,0.4364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,2,0.1986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,8,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,4,0.1912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,16,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,32,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,128,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,64,0.1979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,256,0.2005
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,512,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,2048,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,1024,0.2002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,4096,0.3245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,8192,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,16384,0.2324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,32768,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,65536,0.3382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,8,1,1,131072,0.4657
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,2,0.2075
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,4,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,8,0.2118
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,32,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,16,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,64,0.2145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,128,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,256,0.2081
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,512,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,1024,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,2048,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,4096,0.3545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,8192,0.2403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,16384,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,32768,0.3003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,65536,0.3640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,16,1,1,131072,0.5070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,2,0.2088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,4,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,8,0.2048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,16,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,32,0.2107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,64,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,128,0.2043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,256,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,512,0.2116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,1024,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,2048,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,4096,0.3606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,8192,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,16384,0.2721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,32768,0.3018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,131072,0.5456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,32,1,1,65536,0.3909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,4,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,2,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,8,0.2293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,16,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,32,0.2167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,64,0.2178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,128,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,256,0.2171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,512,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,1024,0.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,2048,0.2391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,4096,0.3682
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,8192,0.2706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,16384,0.2917
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,32768,0.3384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,65536,0.4420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,2,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,4,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,8,0.2699
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,64,1,1,131072,0.6344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,16,0.2517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,32,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,64,0.2532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,128,0.2557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,256,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,512,0.2665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,1024,0.2739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,4096,0.4239
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,2048,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,8192,0.3270
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,16384,0.3533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,32768,0.4240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,65536,0.5680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,128,1,1,131072,0.8549
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,2,0.3555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,4,0.3210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,8,0.3211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,16,0.3251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,32,0.3492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,64,0.3231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,128,0.3368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,256,0.3526
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,512,0.3420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,1024,0.3503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,2048,0.3759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,4096,0.6304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,8192,0.4352
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,16384,0.4942
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,32768,0.6128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,2,0.4346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,65536,0.8535
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,4,0.4339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,256,1,1,131072,1.4121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,8,0.4339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,16,0.5073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,64,0.4400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,32,0.4356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,256,0.4518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,128,0.4421
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,512,0.4632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,1024,0.5043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,2048,0.5365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,4096,0.9197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,16384,0.7742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,8192,0.6478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,32768,1.0123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,2,0.6825
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,4,0.6824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,512,1,1,65536,1.4726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,16,0.8273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,8,0.6837
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,32,0.6927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,64,0.6988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,128,0.7011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,512,0.7420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,256,0.7994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,1024,0.7840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,2048,0.8946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,16384,1.3401
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,4096,1.4322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,8192,1.1111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,2,0.1645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,4,0.1702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,8,0.1633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,4,1024,1,1,32768,1.8006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,16,0.1689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,32,0.1712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,64,0.1732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,128,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,256,0.1642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,512,0.1705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,1024,0.1655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,2048,0.1695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,4096,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,8192,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,16384,0.2038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,32768,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,65536,0.2817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1,1,1,131072,0.3941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,2,0.1671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,4,0.1801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,16,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,32,0.1862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,8,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,64,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,128,0.1669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,512,0.1678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,256,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,1024,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,2048,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,4096,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,8192,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,16384,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,32768,0.2252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,65536,0.2857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,2,1,1,131072,0.4085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,2,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,4,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,8,0.1875
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,16,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,32,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,128,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,256,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,64,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,512,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,1024,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,2048,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,8192,0.2050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,65536,0.3079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,4096,0.3079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,16384,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,32768,0.2437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,4,1,1,131072,0.4249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,4,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,2,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,8,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,32,0.1911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,16,0.1910
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,64,0.1977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,128,0.1979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,256,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,512,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,1024,0.1992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,2048,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,4096,0.3205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,8192,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,16384,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,32768,0.2625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,65536,0.3354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,8,1,1,131072,0.4661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,2,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,4,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,8,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,16,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,32,0.2126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,128,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,64,0.2143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,256,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,512,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,1024,0.2085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,2048,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,4096,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,8192,0.2470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,16384,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,32768,0.2960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,65536,0.3704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,16,1,1,131072,0.4983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,4,0.2083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,2,0.2110
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,8,0.2034
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,16,0.2091
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,32,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,128,0.2083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,64,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,256,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,512,0.2088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,1024,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,2048,0.2205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,4096,0.3564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,8192,0.2531
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,16384,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,32768,0.3060
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,65536,0.3893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,32,1,1,131072,0.5428
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,2,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,4,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,8,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,16,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,32,0.2168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,64,0.2131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,128,0.2149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,256,0.2146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,512,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,1024,0.2271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,2048,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,4096,0.3682
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,8192,0.2626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,16384,0.2896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,32768,0.3365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,65536,0.4403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,64,1,1,131072,0.6327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,2,0.2485
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,4,0.2662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,8,0.2487
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,16,0.2574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,32,0.2480
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,64,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,128,0.2517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,256,0.2504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,1024,0.2735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,512,0.2647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,2048,0.2835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,4096,0.4177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,8192,0.3254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,16384,0.3560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,32768,0.4231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,65536,0.5654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,2,0.3169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,8,0.3441
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,4,0.3179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,128,1,1,131072,0.8479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,16,0.3199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,32,0.3209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,64,0.3191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,128,0.3225
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,256,0.3335
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,512,0.3366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,2048,0.3735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,1024,0.3496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,4096,0.6278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,8192,0.4349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,16384,0.4907
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,32768,0.6084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,65536,0.8411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,256,1,1,131072,1.4066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,4,0.4277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,2,0.4280
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,8,0.4302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,16,0.4293
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,32,0.4307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,64,0.4303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,128,0.4333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,256,0.4463
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,512,0.4581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,2048,0.5292
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,1024,0.4815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,4096,0.9173
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,16384,0.7642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,8192,0.6458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,32768,1.0048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,512,1,1,65536,1.4637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,2,0.6727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,4,0.6715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,8,0.8350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,16,0.6757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,32,0.6781
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,64,0.6889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,128,0.6869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,256,0.7068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,512,0.7484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,2048,0.8839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,1024,0.7739
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,4096,1.4324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,8192,1.0906
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,2,0.1633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,16384,1.3405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,4,0.1687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,2,1024,1,1,32768,1.7861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,8,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,16,0.1703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,32,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,64,0.1628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,128,0.1609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,256,0.1709
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,512,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,1024,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,4096,0.1850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,2048,0.1712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,8192,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,32768,0.2207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,16384,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,65536,0.2899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1,1,1,131072,0.3964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,2,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,4,0.1788
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,8,0.1827
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,16,0.1715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,32,0.1703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,64,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,128,0.1799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,256,0.1811
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,512,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,1024,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,2048,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,4096,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,8192,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,16384,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,32768,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,65536,0.2982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,2,1,1,131072,0.4044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,2,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,4,0.1760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,8,0.1730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,16,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,32,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,64,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,128,0.1816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,256,0.1762
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,512,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,1024,0.1839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,2048,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,4096,0.3123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,8192,0.2066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,16384,0.2158
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,32768,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,65536,0.2991
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,4,1,1,131072,0.4233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,2,0.1929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,4,0.1846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,16,0.1961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,32,0.1945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,8,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,64,0.1940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,128,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,256,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,512,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,1024,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,2048,0.1967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,8192,0.2146
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,4096,0.3267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,16384,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,32768,0.2543
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,65536,0.3344
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,8,1,1,131072,0.4596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,2,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,4,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,16,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,32,0.2120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,8,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,64,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,128,0.2098
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,256,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,512,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,1024,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,4096,0.3460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,2048,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,8192,0.2412
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,16384,0.2508
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,32768,0.2930
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,65536,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,16,1,1,131072,0.4971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,2,0.2102
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,4,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,8,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,16,0.2031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,32,0.2083
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,64,0.2058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,128,0.2070
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,256,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,512,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,1024,0.2088
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,4096,0.3525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,2048,0.2240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,8192,0.2525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,16384,0.2698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,32768,0.3023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,131072,0.5432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,2,0.2155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,4,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,8,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,32,1,1,65536,0.3849
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,16,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,32,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,64,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,256,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,128,0.2089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,512,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,1024,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,2048,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,4096,0.3690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,8192,0.2696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,16384,0.2858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,32768,0.3320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,65536,0.4347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,4,0.2476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,64,1,1,131072,0.6333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,2,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,8,0.2470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,16,0.2494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,32,0.2464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,64,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,128,0.2484
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,256,0.2523
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,512,0.2656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,1024,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,2048,0.2816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,4096,0.4201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,8192,0.3217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,16384,0.3474
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,32768,0.4214
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,65536,0.5645
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,2,0.3165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,128,1,1,131072,0.8471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,8,0.3190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,4,0.3191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,16,0.3169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,32,0.3410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,64,0.3171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,128,0.3221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,512,0.3327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,256,0.3349
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,1024,0.3486
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,4096,0.6228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,8192,0.4338
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,2048,0.3702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,16384,0.4899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,32768,0.6106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,65536,0.8388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,2,0.4250
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,4,0.5009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,16,0.4253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,8,0.4256
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,256,1,1,131072,1.4090
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,32,0.4258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,64,0.4309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,128,0.4332
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,256,0.4575
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,512,0.4679
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,2048,0.5240
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,1024,0.4763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,4096,0.9124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,8192,0.6391
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,16384,0.7640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,32768,0.9974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,512,1,1,65536,1.4604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,2,0.6659
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,4,0.6665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,8,0.6655
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,16,0.6701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,32,0.6726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,64,0.6845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,128,0.6824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,256,0.6967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,512,0.7416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,1024,0.7650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,2048,0.8723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,4096,1.4208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,8192,1.0909
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,2,0.2564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,16384,1.3350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,4,0.2452
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,float16,nvfp4,1,1024,1,1,32768,1.7839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,8,0.2432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,16,0.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,32,0.2460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,64,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,128,0.2400
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,256,0.2464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,512,0.2439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,1024,0.2405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,2048,0.2437
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,4096,0.3783
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,8192,0.2612
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,16384,0.2776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,32768,0.3058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,65536,0.3662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1,1,1,131072,0.4687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,2,0.2604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,4,0.2544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,8,0.2616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,16,0.2524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,32,0.2593
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,64,0.2608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,128,0.2561
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,256,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,512,0.2569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,1024,0.2588
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,2048,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,8192,0.2829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,4096,0.2690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,16384,0.2940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,32768,0.3197
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,65536,0.3742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,2,1,1,131072,0.4946
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,4,0.2746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,2,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,8,0.2705
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,16,0.2652
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,32,0.2690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,64,0.2707
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,128,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,256,0.2710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,512,0.2723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,1024,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,2048,0.2727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,4096,0.2817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,8192,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,16384,0.3068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,32768,0.3235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,65536,0.3967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,4,1,1,131072,0.5068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,2,0.2786
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,4,0.2799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,8,0.2729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,16,0.2808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,32,0.2820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,64,0.2722
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,128,0.2821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,256,0.2818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,512,0.2797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,1024,0.2805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,2048,0.2738
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,4096,0.4120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,8192,0.2964
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,16384,0.3175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,32768,0.3432
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,65536,0.4108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,8,1,1,131072,0.5299
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,4,0.2967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,2,0.3009
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,8,0.3014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,16,0.2977
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,64,0.3040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,128,0.3022
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,32,0.3025
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,512,0.2982
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,256,0.2987
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,1024,0.3017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,2048,0.3018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,4096,0.4275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,8192,0.3278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,16384,0.3385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,32768,0.3678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,65536,0.4415
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,16,1,1,131072,0.5719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,2,0.3153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,4,0.3172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,8,0.3128
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,16,0.3185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,32,0.3172
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,64,0.3160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,128,0.3111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,256,0.3164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,1024,0.3161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,512,0.3155
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,2048,0.3130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,4096,0.4514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,8192,0.3439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,16384,0.3587
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,32768,0.3934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,65536,0.4833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,32,1,1,131072,0.6365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,4,0.3615
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,8,0.3669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,2,0.3653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,16,0.3654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,32,0.3667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,64,0.3601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,128,0.3650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,256,0.3649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,512,0.3613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,1024,0.3667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,2048,0.3700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,4096,0.5017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,8192,0.3943
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,16384,0.4192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,32768,0.4684
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,65536,0.5723
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,2,0.4557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,4,0.4560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,64,1,1,131072,0.7662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,8,0.4568
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,16,0.4503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,32,0.4537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,64,0.4552
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,128,0.4555
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,256,0.4576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,512,0.4522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,2048,0.4669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,4096,0.5953
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,1024,0.4610
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,8192,0.5018
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,16384,0.5336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,32768,0.6020
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,65536,0.7493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,2,0.6795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,4,0.6839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,8,0.6802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,128,1,1,131072,1.0259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,16,0.6794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,32,0.6842
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,64,0.6847
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,128,0.6919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,256,0.6912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,512,0.6979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,1024,0.6979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,2048,0.7094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,4096,0.9581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,8192,0.7677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,16384,0.8241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,32768,0.9500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,65536,1.1769
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,2,1.1272
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,256,1,1,131072,1.7389
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,4,1.1260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,8,1.1231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,16,1.1273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,32,1.1294
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,64,1.1309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,128,1.1417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,256,1.1388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,512,1.1464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,2048,1.1670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,1024,1.1500
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,4096,1.6563
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,8192,1.2710
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,16384,1.3985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,32768,1.6406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,512,1,1,65536,2.1065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2,1.9233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4,1.9122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8,1.9162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16,1.9174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32,1.9150
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,64,1.9218
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,128,1.9217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,256,1.9120
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,512,1.9385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,2048,1.9895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,4096,2.5110
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,1024,1.9478
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,2,0.2205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,8192,2.1706
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,4,0.2068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,16384,2.4231
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,8,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,32,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,128,1024,1,1,32768,2.8783
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,16,0.2056
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,64,0.2027
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,128,0.2115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,256,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,512,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,1024,0.2061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,2048,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,4096,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,8192,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,16384,0.2370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,32768,0.2681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,65536,0.3307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1,1,1,131072,0.4312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,2,0.2175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,4,0.2186
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,16,0.2161
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,8,0.2137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,32,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,64,0.2209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,128,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,256,0.2130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,512,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,1024,0.2147
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,2048,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,4096,0.3468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,8192,0.2459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,16384,0.2547
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,32768,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,65536,0.3454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,2,1,1,131072,0.4464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,2,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,8,0.2245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,4,0.2235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,16,0.2279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,32,0.2267
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,64,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,128,0.2255
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,256,0.2188
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,512,0.2259
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,1024,0.2252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,2048,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,4096,0.3561
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,16384,0.2546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,32768,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,8192,0.2513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,65536,0.3458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,2,0.2306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,4,1,1,131072,0.4592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,4,0.2305
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,8,0.2336
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,16,0.2273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,32,0.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,64,0.2312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,128,0.2283
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,512,0.2315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,1024,0.2295
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,256,0.2306
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,2048,0.2346
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,4096,0.3564
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,8192,0.2551
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,16384,0.2608
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,32768,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,65536,0.3601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,8,1,1,131072,0.4821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,2,0.2546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,4,0.2524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,8,0.2483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,16,0.2533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,32,0.2492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,64,0.2537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,256,0.2524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,512,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,1024,0.2525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,128,0.2545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,2048,0.2503
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,4096,0.3838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,8192,0.2711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,16384,0.2902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,32768,0.3228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,65536,0.3936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,2,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,4,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,8,0.2556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,16,1,1,131072,0.5315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,16,0.2514
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,32,0.2573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,64,0.2553
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,128,0.2539
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,256,0.2565
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,512,0.2527
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,1024,0.2570
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,2048,0.2581
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,4096,0.3848
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,8192,0.2842
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,16384,0.2988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,32768,0.3354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,65536,0.4227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,32,1,1,131072,0.5725
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,2,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,4,0.2808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,8,0.2761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,16,0.2840
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,32,0.2799
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,64,0.2798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,128,0.2818
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,256,0.2778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,512,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,1024,0.2828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,2048,0.2870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,8192,0.3144
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,4096,0.4126
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,16384,0.3347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,32768,0.3841
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,65536,0.4845
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,2,0.3262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,4,0.3271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,64,1,1,131072,0.6855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,8,0.3249
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,16,0.3223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,32,0.3258
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,64,0.3265
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,128,0.3228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,256,0.3268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,512,0.3289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,1024,0.3287
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,2048,0.3312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,4096,0.4647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,8192,0.3694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,16384,0.4011
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,32768,0.4701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,65536,0.6131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,128,1,1,131072,0.9001
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,2,0.4445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,4,0.4442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,8,0.4409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,16,0.4439
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,32,0.4440
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,64,0.4456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,128,0.4435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,256,0.4458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,512,0.4466
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,1024,0.4491
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,2048,0.4572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,4096,0.7103
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,8192,0.5152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,16384,0.5736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,32768,0.6925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,65536,0.9309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,256,1,1,131072,1.4981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,2,0.6702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,4,0.6682
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,8,0.6701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,32,0.6726
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,16,0.6728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,64,0.6797
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,128,0.6847
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,256,0.6891
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,512,0.6882
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,1024,0.6971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,2048,0.7099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,4096,1.0894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,8192,0.8148
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,16384,0.9414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,32768,1.1764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,512,1,1,65536,1.6450
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2,1.0871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4,1.0952
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8,1.0948
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16,1.0973
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32,1.0979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,64,1.1003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,128,1.1040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,256,1.1036
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,512,1.1065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,1024,1.1271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,2048,1.1560
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,4096,1.6884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,8192,1.3524
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,16384,1.5927
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,2,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,64,1024,1,1,32768,2.0569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,4,0.1830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,8,0.1876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,16,0.1861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,32,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,64,0.1892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,128,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,256,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,1024,0.1871
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,512,0.1808
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,2048,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,4096,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,8192,0.2096
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,32768,0.2475
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,16384,0.2138
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,65536,0.3123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1,1,1,131072,0.4125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,2,0.1924
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,4,0.1992
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,8,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,16,0.2039
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,32,0.1932
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,64,0.1994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,128,0.1957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,256,0.1960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,512,0.1925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,1024,0.1985
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,2048,0.2043
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,4096,0.2093
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,8192,0.2144
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,16384,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,32768,0.2499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,65536,0.3254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,2,1,1,131072,0.4322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,2,0.1994
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,4,0.2069
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,8,0.1983
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,16,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,32,0.2079
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,64,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,128,0.2054
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,512,0.2077
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,256,0.2033
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,1024,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,2048,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,4096,0.3386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,8192,0.2368
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,16384,0.2455
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,32768,0.2633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,65536,0.3384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,4,1,1,131072,0.4513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,2,0.2157
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,4,0.2064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,8,0.2134
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,16,0.2165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,32,0.2150
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,64,0.2104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,128,0.2176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,512,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,256,0.2127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,1024,0.2094
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,2048,0.2175
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,4096,0.3494
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,8192,0.2448
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,16384,0.2522
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,32768,0.2828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,65536,0.3516
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,8,1,1,131072,0.4727
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,2,0.2279
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,4,0.2370
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,8,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,16,0.2308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,32,0.2374
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,64,0.2350
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,128,0.2380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,256,0.2330
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,512,0.2304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,1024,0.2384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,2048,0.2423
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,8192,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,4096,0.3678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,16384,0.2792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,32768,0.3111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,65536,0.3824
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,16,1,1,131072,0.5131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,2,0.2384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,4,0.2394
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,8,0.2406
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,16,0.2371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,32,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,64,0.2409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,128,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,256,0.2375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,512,0.2393
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,2048,0.2476
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,1024,0.2434
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,4096,0.3721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,8192,0.2696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,16384,0.2878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,32768,0.3234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,65536,0.4055
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,32,1,1,131072,0.5654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,2,0.2632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,4,0.2605
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,8,0.2634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,16,0.2626
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,32,0.2672
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,64,0.2617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,256,0.2653
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,128,0.2607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,1024,0.2676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,2048,0.2692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,4096,0.4026
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,512,0.2624
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,8192,0.3015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,16384,0.3211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,32768,0.3697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,65536,0.4719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,64,1,1,131072,0.6719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,4,0.3074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,2,0.3052
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,8,0.3066
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,16,0.3023
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,32,0.3058
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,64,0.3074
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,128,0.3038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,256,0.3084
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,512,0.3086
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,1024,0.3101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,2048,0.3156
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,4096,0.4471
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,8192,0.3506
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,16384,0.3823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,32768,0.4479
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,65536,0.5981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,2,0.4123
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,128,1,1,131072,0.8821
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,4,0.4125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,8,0.4104
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,32,0.4137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,16,0.4131
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,64,0.4127
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,128,0.4154
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,256,0.4163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,512,0.4190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,1024,0.4209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,2048,0.4271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,4096,0.6784
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,8192,0.4857
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,16384,0.5475
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,32768,0.6675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,65536,0.9021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,2,0.6076
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,4,0.6062
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,256,1,1,131072,1.4677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,8,0.6050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,16,0.6089
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,32,0.6110
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,64,0.6122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,128,0.6179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,256,0.6196
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,512,0.6193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,1024,0.6273
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,2048,0.6416
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,4096,1.1297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,16384,0.8728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,8192,0.7481
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,32768,1.1144
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2,0.9846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,512,1,1,65536,1.5809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4,0.9817
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8,0.9833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16,0.9867
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32,0.9887
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,64,0.9939
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,128,0.9936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,256,0.9935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,512,1.0047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,1024,1.0149
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,2048,1.0446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,4096,1.5703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,2,0.1712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,8192,1.2469
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,16384,1.4895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,4,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,32,1024,1,1,32768,1.9460
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,8,0.1779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,16,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,32,0.1693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,64,0.1697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,128,0.1801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,256,0.1819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,512,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,1024,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,2048,0.1777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,4096,0.1794
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,8192,0.1941
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,16384,0.2013
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,65536,0.3030
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,131072,0.4133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1,1,1,32768,0.2360
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,2,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,8,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,4,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,16,0.1835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,32,0.1834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,64,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,256,0.1929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,128,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,512,0.1911
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,1024,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,2048,0.1920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,4096,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,8192,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,16384,0.2213
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,32768,0.2516
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,65536,0.3107
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,2,1,1,131072,0.4208
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,4,0.1999
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,8,0.1863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,2,0.1960
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,16,0.1883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,32,0.1940
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,64,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,128,0.1938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,256,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,1024,0.1880
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,2048,0.1961
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,512,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,4096,0.3202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,8192,0.2206
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,16384,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,32768,0.2550
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,65536,0.3276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,4,1,1,131072,0.4358
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,2,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,4,0.2015
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,8,0.2050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,16,0.2059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,32,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,64,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,128,0.2078
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,256,0.1967
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,512,0.2000
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,1024,0.2006
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,2048,0.2040
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,4096,0.3380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,8192,0.2321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,16384,0.2458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,65536,0.3380
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,131072,0.4596
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,8,1,1,32768,0.2754
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,2,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,4,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,8,0.2260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,16,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,32,0.2274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,64,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,128,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,256,0.2254
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,512,0.2211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,1024,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,2048,0.2307
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,4096,0.3615
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,8192,0.2586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,16384,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,32768,0.2974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,65536,0.3779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,16,1,1,131072,0.5117
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,2,0.2261
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,4,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,8,0.2282
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,32,0.2234
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,16,0.2289
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,64,0.2216
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,256,0.2297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,128,0.2264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,512,0.2268
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,1024,0.2303
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,2048,0.2351
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,4096,0.3622
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,8192,0.2651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,16384,0.2757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,32768,0.3165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,65536,0.4021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,32,1,1,131072,0.5557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,2,0.2489
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,4,0.2499
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,8,0.2445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,16,0.2507
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,32,0.2464
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,64,0.2497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,128,0.2504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,256,0.2495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,512,0.2528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,1024,0.2562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,2048,0.2544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,4096,0.3925
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,8192,0.2855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,16384,0.3100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,32768,0.3591
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,65536,0.4632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,64,1,1,131072,0.6640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,2,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,4,0.2819
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,8,0.2888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,16,0.2816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,32,0.2856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,64,0.2863
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,128,0.2850
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,512,0.2945
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,1024,0.2900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,256,0.2897
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,2048,0.2997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,4096,0.4309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,8192,0.3363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,16384,0.3687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,32768,0.4319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,65536,0.5807
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,2,0.3876
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,128,1,1,131072,0.8662
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,4,0.3883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,8,0.3858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,16,0.3881
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,32,0.3895
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,64,0.3873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,128,0.3884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,512,0.3938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,256,0.3920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,2048,0.4044
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,1024,0.3976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,4096,0.6536
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,8192,0.4640
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,16384,0.5171
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,32768,0.6418
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,2,0.5562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,4,0.5569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,131072,1.4468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,256,1,1,65536,0.8795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,8,0.5577
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,16,0.5574
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,32,0.5566
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,64,0.5604
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,128,0.5661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,256,0.5669
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,512,0.5702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,1024,0.5763
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,2048,0.5934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,4096,0.9701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,16384,0.8205
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,8192,0.6938
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,32768,1.0607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,512,1,1,65536,1.5260
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2,0.8756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4,0.8778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8,0.8771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16,0.8770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32,0.8775
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,64,0.8782
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,128,0.8855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,256,0.8862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,512,0.8957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,2048,0.9396
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,1024,0.9116
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,4096,1.4529
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,16384,1.3761
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,2,0.1787
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,8192,1.1327
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,4,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,16,1024,1,1,32768,1.8402
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,8,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,32,0.1731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,16,0.1764
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,64,0.1649
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,128,0.1802
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,256,0.1681
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,512,0.1749
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,1024,0.1635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,2048,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,4096,0.1854
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,8192,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,16384,0.1951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,32768,0.2276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,65536,0.2937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1,1,1,131072,0.4080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,2,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,4,0.1883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,8,0.1869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,16,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,32,0.1785
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,64,0.1853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,128,0.1851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,256,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,512,0.1792
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,1024,0.1862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,2048,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,4096,0.3165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,8192,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,16384,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,65536,0.3145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,32768,0.2399
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,2,1,1,131072,0.4080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,2,0.1955
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,4,0.1914
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,8,0.1937
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,16,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,32,0.1936
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,64,0.1954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,128,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,256,0.1813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,512,0.1900
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,1024,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,2048,0.1956
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,4096,0.3167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,8192,0.2193
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,16384,0.2322
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,32768,0.2544
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,65536,0.3047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,4,1,1,131072,0.4417
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,2,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,8,0.1929
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,4,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,16,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,32,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,64,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,128,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,256,0.1995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,512,0.1912
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,1024,0.1997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,2048,0.2031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,4096,0.3324
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,8192,0.2274
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,16384,0.2364
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,32768,0.2625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,65536,0.3409
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,8,1,1,131072,0.4538
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,2,0.2228
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,4,0.2210
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,8,0.2221
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,16,0.2229
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,32,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,64,0.2133
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,128,0.2187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,256,0.2121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,512,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,1024,0.2223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,2048,0.2263
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,4096,0.3594
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,8192,0.2475
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,16384,0.2592
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,32768,0.2902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,65536,0.3728
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,16,1,1,131072,0.5097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,2,0.2243
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,4,0.2235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,8,0.2248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,16,0.2165
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,32,0.2217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,64,0.2185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,128,0.2220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,256,0.2237
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,512,0.2252
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,1024,0.2262
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,2048,0.2326
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,4096,0.3586
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,8192,0.2576
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,16384,0.2715
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,32768,0.3140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,65536,0.3975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,2,0.2427
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,32,1,1,131072,0.5534
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,4,0.2456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,8,0.2366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,16,0.2424
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,32,0.2375
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,64,0.2435
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,128,0.2430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,256,0.2456
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,512,0.2459
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,1024,0.2495
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,2048,0.2498
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,4096,0.3859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,8192,0.2815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,16384,0.3062
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,32768,0.3540
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,65536,0.4597
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,64,1,1,131072,0.6558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,2,0.2757
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,4,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,8,0.2700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,16,0.2756
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,32,0.2773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,64,0.2759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,128,0.2768
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,256,0.2774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,512,0.2851
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,1024,0.2832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,2048,0.2866
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,4096,0.4269
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,8192,0.3304
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,16384,0.3607
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,32768,0.4284
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,65536,0.5742
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,128,1,1,131072,0.8583
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,2,0.3717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,4,0.3714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,8,0.3746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,16,0.3746
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,32,0.3748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,64,0.3753
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,128,0.3760
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,256,0.3790
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,512,0.3773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,1024,0.3806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,2048,0.3919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,4096,0.6430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,8192,0.4492
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,16384,0.5073
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,32768,0.6297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,65536,0.8635
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,2,0.5309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,4,0.5319
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,256,1,1,131072,1.4325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,8,0.5339
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,16,0.5345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,32,0.5356
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,64,0.5371
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,128,0.5410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,256,0.5422
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,512,0.5443
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,2048,0.5674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,1024,0.5530
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,4096,0.9454
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,8192,0.6731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,16384,0.7995
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,32768,1.0367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,512,1,1,65536,1.5004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2,0.8219
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8,0.8245
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4,0.8251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16,0.8253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32,0.8275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,64,0.8318
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,128,0.8313
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,256,0.8321
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,512,0.8405
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,1024,0.8577
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,2048,0.8836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,4096,1.4209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,16384,1.3276
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,8192,1.0861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,8,1024,1,1,32768,1.7931
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,2,0.1634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,4,0.1687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,8,0.1647
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,16,0.1721
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,32,0.1692
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,64,0.1729
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,128,0.1719
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,256,0.1637
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,512,0.1740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,1024,0.1687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,2048,0.1648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,4096,0.3002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,16384,0.2017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,8192,0.1894
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,32768,0.2278
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,65536,0.2806
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1,1,1,131072,0.4004
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,2,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,4,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,8,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,16,0.1835
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,32,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,64,0.1830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,128,0.1740
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,256,0.1798
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,512,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,1024,0.1751
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,2048,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,4096,0.3124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,8192,0.2021
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,16384,0.2028
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,32768,0.2387
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,65536,0.3047
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,2,1,1,131072,0.4059
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,2,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,4,0.1918
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,16,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,8,0.1920
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,32,0.1934
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,64,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,128,0.1933
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,256,0.1834
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,512,0.1888
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,1024,0.1896
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,2048,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,4096,0.3236
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,8192,0.2106
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,16384,0.2227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,32768,0.2518
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,65536,0.3068
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,4,1,1,131072,0.4347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,2,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,4,0.1901
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,8,0.1988
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,16,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,32,0.2003
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,64,0.1984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,128,0.1893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,256,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,512,0.1984
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,1024,0.1915
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,2048,0.1998
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,4096,0.3286
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,8192,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,16384,0.2323
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,32768,0.2562
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,65536,0.3347
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,8,1,1,131072,0.4632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,2,0.2097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,4,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,16,0.2204
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,8,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,32,0.2191
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,64,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,128,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,256,0.2100
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,512,0.2189
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,1024,0.2202
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,2048,0.2244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,4096,0.3513
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,8192,0.2483
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,16384,0.2548
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,32768,0.2949
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,65536,0.3639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,16,1,1,131072,0.5061
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,2,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,4,0.2211
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,8,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,16,0.2135
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,64,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,32,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,128,0.2114
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,256,0.2200
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,512,0.2203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,1024,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,2048,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,4096,0.3572
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,8192,0.2496
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,16384,0.2711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,32768,0.3099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,65536,0.3962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,32,1,1,131072,0.5501
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,2,0.2410
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,4,0.2345
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,8,0.2329
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,16,0.2386
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,32,0.2385
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,64,0.2413
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,128,0.2403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,256,0.2407
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,512,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,1024,0.2429
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,2048,0.2461
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,4096,0.3823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,8192,0.2823
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,16384,0.3029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,32768,0.3525
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,65536,0.4556
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,64,1,1,131072,0.6537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,2,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,4,0.2661
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,8,0.2700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,16,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,32,0.2730
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,64,0.2698
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,128,0.2713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,256,0.2759
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,512,0.2733
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,1024,0.2795
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,2048,0.2862
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,4096,0.4251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,8192,0.3275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,32768,0.4251
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,65536,0.5717
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,131072,0.8470
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,2,0.3648
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,4,0.3656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,128,1,1,16384,0.3569
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,16,0.3686
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,8,0.3677
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,32,0.3680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,64,0.3703
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,128,0.3665
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,256,0.3701
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,512,0.3741
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,1024,0.3778
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,2048,0.3893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,4096,0.6384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,8192,0.4455
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,16384,0.4990
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,32768,0.6223
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,65536,0.8658
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,2,0.5198
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,256,1,1,131072,1.4264
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,4,0.5215
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,8,0.5209
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,16,0.5199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,32,0.5203
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,64,0.5244
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,128,0.5297
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,256,0.5309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,512,0.5325
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,2048,0.5567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,1024,0.5403
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,8192,0.6589
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,4096,0.9353
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,16384,0.7838
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,32768,1.0233
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,512,1,1,65536,1.4893
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2,0.7986
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4,0.7990
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8,0.7997
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16,0.7958
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32,0.8031
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,64,0.8048
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,128,0.8063
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,256,0.8071
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,512,0.8152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,2048,0.8601
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,4096,1.3898
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,1024,0.8333
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,16384,1.3035
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,2,0.1668
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,8192,1.0618
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,4,1024,1,1,32768,1.7639
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,4,0.1716
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,8,0.1696
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,16,0.1650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,64,0.1712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,32,0.1687
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,128,0.1711
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,1024,0.1634
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,256,0.1678
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,2048,0.1688
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,512,0.1695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,4096,0.3002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,8192,0.1827
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,16384,0.1965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,32768,0.2178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,2,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,131072,0.3892
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1,1,1,65536,0.2878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,4,0.1844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,8,0.1691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,32,0.1770
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,16,0.1839
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,64,0.1673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,128,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,256,0.1750
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,512,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,1024,0.1844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,2048,0.1691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,4096,0.1899
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,8192,0.2037
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,16384,0.1965
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,32768,0.2365
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,65536,0.3017
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,2,0.1872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,2,1,1,131072,0.4121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,4,0.1807
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,8,0.1878
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,16,0.1879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,32,0.1788
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,64,0.1884
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,128,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,256,0.1890
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,512,0.1855
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,1024,0.1773
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,2048,0.1874
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,4096,0.3167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,8192,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,16384,0.2141
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,32768,0.2446
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,131072,0.4320
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,4,1,1,65536,0.3097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,2,0.1902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,4,0.1966
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,8,0.2008
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,16,0.1902
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,32,0.1978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,64,0.1976
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,128,0.1974
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,256,0.1913
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,512,0.1989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,1024,0.1979
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,2048,0.2016
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,4096,0.3185
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,8192,0.2179
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,16384,0.2224
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,65536,0.3366
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,131072,0.4606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,2,0.2170
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,4,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,8,1,1,32768,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,8,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,16,0.2183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,32,0.2119
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,64,0.2177
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,128,0.2178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,512,0.2201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,1024,0.2181
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,2048,0.2130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,256,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,4096,0.3442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,8192,0.2458
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,16384,0.2582
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,32768,0.2981
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,65536,0.3704
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,16,1,1,131072,0.5029
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,2,0.2105
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,4,0.2164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,8,0.2122
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,16,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,32,0.2217
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,64,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,128,0.2164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,256,0.2167
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,512,0.2124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,1024,0.2190
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,2048,0.2192
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,4096,0.3557
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,8192,0.2546
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,16384,0.2702
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,32768,0.3064
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,65536,0.3889
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,2,0.2369
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,32,1,1,131072,0.5472
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,4,0.2315
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,8,0.2367
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,16,0.2377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,32,0.2377
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,64,0.2355
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,128,0.2308
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,256,0.2361
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,512,0.2384
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,1024,0.2388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,2048,0.2493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,4096,0.3800
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,8192,0.2801
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,16384,0.3014
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,32768,0.3442
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,65536,0.4533
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,64,1,1,131072,0.6504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,2,0.2633
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,4,0.2695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,8,0.2693
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,16,0.2689
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,32,0.2631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,64,0.2690
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,256,0.2712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,128,0.2695
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,512,0.2731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,1024,0.2820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,2048,0.2860
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,4096,0.4235
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,16384,0.3504
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,32768,0.4248
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,65536,0.5720
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,8192,0.3206
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,2,0.3616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,128,1,1,131072,0.8528
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,4,0.3632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,16,0.3650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,8,0.3599
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,32,0.3650
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,64,0.3632
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,128,0.3623
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,256,0.3694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,512,0.3700
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,1024,0.3748
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,2048,0.3853
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,4096,0.6312
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,16384,0.4971
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,8192,0.4411
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,32768,0.6207
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,65536,0.8558
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,2,0.5125
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,256,1,1,131072,1.4201
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,4,0.5159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,8,0.5121
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,16,0.5145
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,64,0.5176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,32,0.5159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,128,0.5227
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,256,0.5253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,512,0.5275
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,2048,0.5517
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,1024,0.5354
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,4096,0.9290
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,16384,0.7776
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,8192,0.6532
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,32768,1.0187
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,512,1,1,65536,1.4858
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2,0.7872
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4,0.7846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8,0.7864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16,0.7864
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32,0.7869
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,64,0.7919
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,128,0.7963
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,256,0.7962
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,2048,0.8477
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,512,0.8050
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,1024,0.8216
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,4096,1.3732
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,8192,1.0420
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,16384,1.2883
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,2,0.1656
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,4,0.1631
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,2,1024,1,1,32768,1.7567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,8,0.1674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,16,0.1691
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,32,0.1641
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,64,0.1630
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,1024,0.1667
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,512,0.1628
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,256,0.1660
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,128,0.1673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,2048,0.1680
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,4096,0.1829
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,8192,0.1820
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,16384,0.1916
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,32768,0.2178
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,131072,0.3903
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,4,0.1815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,2,0.1812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1,1,1,65536,0.2816
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,8,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,16,0.1767
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,32,0.1713
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,64,0.1712
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,256,0.1814
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,128,0.1793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,512,0.1828
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,1024,0.1791
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,4096,0.1852
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,2048,0.1815
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,8192,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,16384,0.1996
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,32768,0.2382
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,65536,0.2978
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,2,1,1,131072,0.4111
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,2,0.1833
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,4,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,16,0.1832
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,8,0.1846
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,64,0.1856
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,32,0.1771
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,512,0.1831
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,128,0.1830
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,1024,0.1774
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,256,0.1810
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,2048,0.1809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,4096,0.3153
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,16384,0.2057
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,8192,0.2019
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,32768,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,65536,0.3065
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,4,1,1,131072,0.4277
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,2,0.1944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,4,0.1908
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,8,0.1975
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,16,0.1873
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,64,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,128,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,32,0.1885
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,256,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,512,0.1944
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,1024,0.1935
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,2048,0.1954
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,4096,0.3124
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,8192,0.2080
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,16384,0.2242
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,32768,0.2573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,131072,0.4580
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,2,0.2182
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,4,0.2140
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,8,1,1,65536,0.3302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,8,0.2085
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,16,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,32,0.2176
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,128,0.2159
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,256,0.2164
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,64,0.2180
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,512,0.2151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,1024,0.2115
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,2048,0.2184
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,4096,0.3430
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,8192,0.2447
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,32768,0.2957
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,16384,0.2573
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,131072,0.5032
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,2,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,16,1,1,65536,0.3697
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,4,0.2163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,8,0.2099
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,16,0.2168
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,32,0.2160
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,64,0.2162
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,128,0.2139
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,512,0.2169
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,256,0.2101
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,1024,0.2130
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,2048,0.2241
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,4096,0.3567
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,8192,0.2511
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,16384,0.2694
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,32768,0.3038
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,65536,0.3859
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,2,0.2302
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,32,1,1,131072,0.5453
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,4,0.2328
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,8,0.2363
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,16,0.2352
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,32,0.2281
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,128,0.2342
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,64,0.2343
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,256,0.2301
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,512,0.2378
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,1024,0.2414
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,2048,0.2455
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,4096,0.3777
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,8192,0.2735
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,16384,0.3002
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,32768,0.3468
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,65536,0.4497
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,64,1,1,131072,0.6493
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,2,0.2674
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,4,0.2673
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,16,0.2613
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,8,0.2685
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,32,0.2654
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,128,0.2671
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,256,0.2675
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,64,0.2615
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,512,0.2736
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,1024,0.2793
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,2048,0.2779
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,4096,0.4226
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,8192,0.3220
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,16384,0.3545
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,32768,0.4183
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,65536,0.5670
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,2,0.3617
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,8,0.3616
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,128,1,1,131072,0.8445
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,4,0.3606
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,16,0.3625
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,32,0.3621
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,64,0.3609
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,128,0.3642
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,256,0.3676
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,512,0.3651
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,1024,0.3714
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,2048,0.3844
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,4096,0.6340
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,16384,0.4951
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,8192,0.4397
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,32768,0.6174
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,65536,0.8537
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,256,1,1,131072,1.4163
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,2,0.5110
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,4,0.5097
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,8,0.5108
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,16,0.5110
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,32,0.5137
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,64,0.5143
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,128,0.5199
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,256,0.5206
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,512,0.5253
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,2048,0.5482
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,1024,0.5309
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,4096,0.9271
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,16384,0.7731
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,8192,0.6510
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,32768,1.0152
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,512,1,1,65536,1.4812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2,0.7805
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4,0.7812
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8,0.7813
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16,0.7809
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32,0.7836
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,64,0.7879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,128,0.7879
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,256,0.7887
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,512,0.7989
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,2048,0.8388
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,1024,0.8151
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,4096,1.3861
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,16384,1.2870
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,8192,1.0465
VLLM,0.16.0,NVIDIA B200,dsa_generation_module,default,zai-org/GLM-5,GlmMoeDsaForCausalLM,float16,fp8,nvfp4,1,1024,1,1,32768,1.7431
